xref: /netbsd-src/sys/dev/raidframe/rf_netbsdkintf.c (revision 4817a0b0b8fe9612e8ebe21a9bf2d97b95038a97)
1 /*	$NetBSD: rf_netbsdkintf.c,v 1.279 2010/12/22 05:51:19 christos Exp $	*/
2 /*-
3  * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Greg Oster; Jason R. Thorpe.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /*
32  * Copyright (c) 1990, 1993
33  *      The Regents of the University of California.  All rights reserved.
34  *
35  * This code is derived from software contributed to Berkeley by
36  * the Systems Programming Group of the University of Utah Computer
37  * Science Department.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  *
63  * from: Utah $Hdr: cd.c 1.6 90/11/28$
64  *
65  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
66  */
67 
68 /*
69  * Copyright (c) 1988 University of Utah.
70  *
71  * This code is derived from software contributed to Berkeley by
72  * the Systems Programming Group of the University of Utah Computer
73  * Science Department.
74  *
75  * Redistribution and use in source and binary forms, with or without
76  * modification, are permitted provided that the following conditions
77  * are met:
78  * 1. Redistributions of source code must retain the above copyright
79  *    notice, this list of conditions and the following disclaimer.
80  * 2. Redistributions in binary form must reproduce the above copyright
81  *    notice, this list of conditions and the following disclaimer in the
82  *    documentation and/or other materials provided with the distribution.
83  * 3. All advertising materials mentioning features or use of this software
84  *    must display the following acknowledgement:
85  *      This product includes software developed by the University of
86  *      California, Berkeley and its contributors.
87  * 4. Neither the name of the University nor the names of its contributors
88  *    may be used to endorse or promote products derived from this software
89  *    without specific prior written permission.
90  *
91  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
92  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
93  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
94  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
95  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
96  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
97  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
98  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
99  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
100  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
101  * SUCH DAMAGE.
102  *
103  * from: Utah $Hdr: cd.c 1.6 90/11/28$
104  *
105  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
106  */
107 
108 /*
109  * Copyright (c) 1995 Carnegie-Mellon University.
110  * All rights reserved.
111  *
112  * Authors: Mark Holland, Jim Zelenka
113  *
114  * Permission to use, copy, modify and distribute this software and
115  * its documentation is hereby granted, provided that both the copyright
116  * notice and this permission notice appear in all copies of the
117  * software, derivative works or modified versions, and any portions
118  * thereof, and that both notices appear in supporting documentation.
119  *
120  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
121  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
122  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
123  *
124  * Carnegie Mellon requests users of this software to return to
125  *
126  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
127  *  School of Computer Science
128  *  Carnegie Mellon University
129  *  Pittsburgh PA 15213-3890
130  *
131  * any improvements or extensions that they make and grant Carnegie the
132  * rights to redistribute these changes.
133  */
134 
135 /***********************************************************
136  *
137  * rf_kintf.c -- the kernel interface routines for RAIDframe
138  *
139  ***********************************************************/
140 
141 #include <sys/cdefs.h>
142 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.279 2010/12/22 05:51:19 christos Exp $");
143 
144 #ifdef _KERNEL_OPT
145 #include "opt_compat_netbsd.h"
146 #include "opt_raid_autoconfig.h"
147 #include "raid.h"
148 #endif
149 
150 #include <sys/param.h>
151 #include <sys/errno.h>
152 #include <sys/pool.h>
153 #include <sys/proc.h>
154 #include <sys/queue.h>
155 #include <sys/disk.h>
156 #include <sys/device.h>
157 #include <sys/stat.h>
158 #include <sys/ioctl.h>
159 #include <sys/fcntl.h>
160 #include <sys/systm.h>
161 #include <sys/vnode.h>
162 #include <sys/disklabel.h>
163 #include <sys/conf.h>
164 #include <sys/buf.h>
165 #include <sys/bufq.h>
166 #include <sys/reboot.h>
167 #include <sys/kauth.h>
168 
169 #include <prop/proplib.h>
170 
171 #include <dev/raidframe/raidframevar.h>
172 #include <dev/raidframe/raidframeio.h>
173 #include <dev/raidframe/rf_paritymap.h>
174 
175 #include "rf_raid.h"
176 #include "rf_copyback.h"
177 #include "rf_dag.h"
178 #include "rf_dagflags.h"
179 #include "rf_desc.h"
180 #include "rf_diskqueue.h"
181 #include "rf_etimer.h"
182 #include "rf_general.h"
183 #include "rf_kintf.h"
184 #include "rf_options.h"
185 #include "rf_driver.h"
186 #include "rf_parityscan.h"
187 #include "rf_threadstuff.h"
188 
189 #ifdef COMPAT_50
190 #include "rf_compat50.h"
191 #endif
192 
193 #ifdef DEBUG
194 int     rf_kdebug_level = 0;
195 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
196 #else				/* DEBUG */
197 #define db1_printf(a) { }
198 #endif				/* DEBUG */
199 
200 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
201 
202 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
203 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
204 
205 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
206 						 * spare table */
207 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
208 						 * installation process */
209 #endif
210 
211 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
212 
213 /* prototypes */
214 static void KernelWakeupFunc(struct buf *);
215 static void InitBP(struct buf *, struct vnode *, unsigned,
216     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
217     void *, int, struct proc *);
218 static void raidinit(RF_Raid_t *);
219 
220 void raidattach(int);
221 static int raid_match(device_t, cfdata_t, void *);
222 static void raid_attach(device_t, device_t, void *);
223 static int raid_detach(device_t, int);
224 
225 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
226     daddr_t, daddr_t);
227 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
228     daddr_t, daddr_t, int);
229 
230 static int raidwrite_component_label(unsigned,
231     dev_t, struct vnode *, RF_ComponentLabel_t *);
232 static int raidread_component_label(unsigned,
233     dev_t, struct vnode *, RF_ComponentLabel_t *);
234 
235 
236 dev_type_open(raidopen);
237 dev_type_close(raidclose);
238 dev_type_read(raidread);
239 dev_type_write(raidwrite);
240 dev_type_ioctl(raidioctl);
241 dev_type_strategy(raidstrategy);
242 dev_type_dump(raiddump);
243 dev_type_size(raidsize);
244 
245 const struct bdevsw raid_bdevsw = {
246 	raidopen, raidclose, raidstrategy, raidioctl,
247 	raiddump, raidsize, D_DISK
248 };
249 
250 const struct cdevsw raid_cdevsw = {
251 	raidopen, raidclose, raidread, raidwrite, raidioctl,
252 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
253 };
254 
255 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
256 
257 /* XXX Not sure if the following should be replacing the raidPtrs above,
258    or if it should be used in conjunction with that...
259 */
260 
261 struct raid_softc {
262 	device_t sc_dev;
263 	int     sc_flags;	/* flags */
264 	int     sc_cflags;	/* configuration flags */
265 	uint64_t sc_size;	/* size of the raid device */
266 	char    sc_xname[20];	/* XXX external name */
267 	struct disk sc_dkdev;	/* generic disk device info */
268 	struct bufq_state *buf_queue;	/* used for the device queue */
269 };
270 /* sc_flags */
271 #define RAIDF_INITED	0x01	/* unit has been initialized */
272 #define RAIDF_WLABEL	0x02	/* label area is writable */
273 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
274 #define RAIDF_SHUTDOWN	0x08	/* unit is being shutdown */
275 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
276 #define RAIDF_LOCKED	0x80	/* unit is locked */
277 
278 #define	raidunit(x)	DISKUNIT(x)
279 int numraid = 0;
280 
281 extern struct cfdriver raid_cd;
282 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
283     raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
284     DVF_DETACH_SHUTDOWN);
285 
286 /*
287  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
288  * Be aware that large numbers can allow the driver to consume a lot of
289  * kernel memory, especially on writes, and in degraded mode reads.
290  *
291  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
292  * a single 64K write will typically require 64K for the old data,
293  * 64K for the old parity, and 64K for the new parity, for a total
294  * of 192K (if the parity buffer is not re-used immediately).
295  * Even it if is used immediately, that's still 128K, which when multiplied
296  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
297  *
298  * Now in degraded mode, for example, a 64K read on the above setup may
299  * require data reconstruction, which will require *all* of the 4 remaining
300  * disks to participate -- 4 * 32K/disk == 128K again.
301  */
302 
303 #ifndef RAIDOUTSTANDING
304 #define RAIDOUTSTANDING   6
305 #endif
306 
307 #define RAIDLABELDEV(dev)	\
308 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
309 
310 /* declared here, and made public, for the benefit of KVM stuff.. */
311 struct raid_softc *raid_softc;
312 
313 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
314 				     struct disklabel *);
315 static void raidgetdisklabel(dev_t);
316 static void raidmakedisklabel(struct raid_softc *);
317 
318 static int raidlock(struct raid_softc *);
319 static void raidunlock(struct raid_softc *);
320 
321 static int raid_detach_unlocked(struct raid_softc *);
322 
323 static void rf_markalldirty(RF_Raid_t *);
324 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
325 
326 void rf_ReconThread(struct rf_recon_req *);
327 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
328 void rf_CopybackThread(RF_Raid_t *raidPtr);
329 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
330 int rf_autoconfig(device_t);
331 void rf_buildroothack(RF_ConfigSet_t *);
332 
333 RF_AutoConfig_t *rf_find_raid_components(void);
334 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
335 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
336 static int rf_reasonable_label(RF_ComponentLabel_t *);
337 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
338 int rf_set_autoconfig(RF_Raid_t *, int);
339 int rf_set_rootpartition(RF_Raid_t *, int);
340 void rf_release_all_vps(RF_ConfigSet_t *);
341 void rf_cleanup_config_set(RF_ConfigSet_t *);
342 int rf_have_enough_components(RF_ConfigSet_t *);
343 int rf_auto_config_set(RF_ConfigSet_t *, int *);
344 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
345 
346 static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
347 				  allow autoconfig to take place.
348 				  Note that this is overridden by having
349 				  RAID_AUTOCONFIG as an option in the
350 				  kernel config file.  */
351 
352 struct RF_Pools_s rf_pools;
353 
354 void
355 raidattach(int num)
356 {
357 	int raidID;
358 	int i, rc;
359 
360 	aprint_debug("raidattach: Asked for %d units\n", num);
361 
362 	if (num <= 0) {
363 #ifdef DIAGNOSTIC
364 		panic("raidattach: count <= 0");
365 #endif
366 		return;
367 	}
368 	/* This is where all the initialization stuff gets done. */
369 
370 	numraid = num;
371 
372 	/* Make some space for requested number of units... */
373 
374 	RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
375 	if (raidPtrs == NULL) {
376 		panic("raidPtrs is NULL!!");
377 	}
378 
379 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
380 	rf_mutex_init(&rf_sparet_wait_mutex);
381 
382 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
383 #endif
384 
385 	for (i = 0; i < num; i++)
386 		raidPtrs[i] = NULL;
387 	rc = rf_BootRaidframe();
388 	if (rc == 0)
389 		aprint_verbose("Kernelized RAIDframe activated\n");
390 	else
391 		panic("Serious error booting RAID!!");
392 
393 	/* put together some datastructures like the CCD device does.. This
394 	 * lets us lock the device and what-not when it gets opened. */
395 
396 	raid_softc = (struct raid_softc *)
397 		malloc(num * sizeof(struct raid_softc),
398 		       M_RAIDFRAME, M_NOWAIT);
399 	if (raid_softc == NULL) {
400 		aprint_error("WARNING: no memory for RAIDframe driver\n");
401 		return;
402 	}
403 
404 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
405 
406 	for (raidID = 0; raidID < num; raidID++) {
407 		bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0);
408 
409 		RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
410 			  (RF_Raid_t *));
411 		if (raidPtrs[raidID] == NULL) {
412 			aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
413 			numraid = raidID;
414 			return;
415 		}
416 	}
417 
418 	if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
419 		aprint_error("raidattach: config_cfattach_attach failed?\n");
420 	}
421 
422 #ifdef RAID_AUTOCONFIG
423 	raidautoconfig = 1;
424 #endif
425 
426 	/*
427 	 * Register a finalizer which will be used to auto-config RAID
428 	 * sets once all real hardware devices have been found.
429 	 */
430 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
431 		aprint_error("WARNING: unable to register RAIDframe finalizer\n");
432 }
433 
434 int
435 rf_autoconfig(device_t self)
436 {
437 	RF_AutoConfig_t *ac_list;
438 	RF_ConfigSet_t *config_sets;
439 
440 	if (raidautoconfig == 0)
441 		return (0);
442 
443 	/* XXX This code can only be run once. */
444 	raidautoconfig = 0;
445 
446 	/* 1. locate all RAID components on the system */
447 	aprint_debug("Searching for RAID components...\n");
448 	ac_list = rf_find_raid_components();
449 
450 	/* 2. Sort them into their respective sets. */
451 	config_sets = rf_create_auto_sets(ac_list);
452 
453 	/*
454 	 * 3. Evaluate each set andconfigure the valid ones.
455 	 * This gets done in rf_buildroothack().
456 	 */
457 	rf_buildroothack(config_sets);
458 
459 	return 1;
460 }
461 
462 void
463 rf_buildroothack(RF_ConfigSet_t *config_sets)
464 {
465 	RF_ConfigSet_t *cset;
466 	RF_ConfigSet_t *next_cset;
467 	int retcode;
468 	int raidID;
469 	int rootID;
470 	int col;
471 	int num_root;
472 	char *devname;
473 
474 	rootID = 0;
475 	num_root = 0;
476 	cset = config_sets;
477 	while (cset != NULL) {
478 		next_cset = cset->next;
479 		if (rf_have_enough_components(cset) &&
480 		    cset->ac->clabel->autoconfigure==1) {
481 			retcode = rf_auto_config_set(cset,&raidID);
482 			if (!retcode) {
483 				aprint_debug("raid%d: configured ok\n", raidID);
484 				if (cset->rootable) {
485 					rootID = raidID;
486 					num_root++;
487 				}
488 			} else {
489 				/* The autoconfig didn't work :( */
490 				aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
491 				rf_release_all_vps(cset);
492 			}
493 		} else {
494 			/* we're not autoconfiguring this set...
495 			   release the associated resources */
496 			rf_release_all_vps(cset);
497 		}
498 		/* cleanup */
499 		rf_cleanup_config_set(cset);
500 		cset = next_cset;
501 	}
502 
503 	/* if the user has specified what the root device should be
504 	   then we don't touch booted_device or boothowto... */
505 
506 	if (rootspec != NULL)
507 		return;
508 
509 	/* we found something bootable... */
510 
511 	if (num_root == 1) {
512 		booted_device = raid_softc[rootID].sc_dev;
513 	} else if (num_root > 1) {
514 
515 		/*
516 		 * Maybe the MD code can help. If it cannot, then
517 		 * setroot() will discover that we have no
518 		 * booted_device and will ask the user if nothing was
519 		 * hardwired in the kernel config file
520 		 */
521 
522 		if (booted_device == NULL)
523 			cpu_rootconf();
524 		if (booted_device == NULL)
525 			return;
526 
527 		num_root = 0;
528 		for (raidID = 0; raidID < numraid; raidID++) {
529 			if (raidPtrs[raidID]->valid == 0)
530 				continue;
531 
532 			if (raidPtrs[raidID]->root_partition == 0)
533 				continue;
534 
535 			for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
536 				devname = raidPtrs[raidID]->Disks[col].devname;
537 				devname += sizeof("/dev/") - 1;
538 				if (strncmp(devname, device_xname(booted_device),
539 					    strlen(device_xname(booted_device))) != 0)
540 					continue;
541 				aprint_debug("raid%d includes boot device %s\n",
542 				       raidID, devname);
543 				num_root++;
544 				rootID = raidID;
545 			}
546 		}
547 
548 		if (num_root == 1) {
549 			booted_device = raid_softc[rootID].sc_dev;
550 		} else {
551 			/* we can't guess.. require the user to answer... */
552 			boothowto |= RB_ASKNAME;
553 		}
554 	}
555 }
556 
557 
558 int
559 raidsize(dev_t dev)
560 {
561 	struct raid_softc *rs;
562 	struct disklabel *lp;
563 	int     part, unit, omask, size;
564 
565 	unit = raidunit(dev);
566 	if (unit >= numraid)
567 		return (-1);
568 	rs = &raid_softc[unit];
569 
570 	if ((rs->sc_flags & RAIDF_INITED) == 0)
571 		return (-1);
572 
573 	part = DISKPART(dev);
574 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
575 	lp = rs->sc_dkdev.dk_label;
576 
577 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
578 		return (-1);
579 
580 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
581 		size = -1;
582 	else
583 		size = lp->d_partitions[part].p_size *
584 		    (lp->d_secsize / DEV_BSIZE);
585 
586 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
587 		return (-1);
588 
589 	return (size);
590 
591 }
592 
593 int
594 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
595 {
596 	int     unit = raidunit(dev);
597 	struct raid_softc *rs;
598 	const struct bdevsw *bdev;
599 	struct disklabel *lp;
600 	RF_Raid_t *raidPtr;
601 	daddr_t offset;
602 	int     part, c, sparecol, j, scol, dumpto;
603 	int     error = 0;
604 
605 	if (unit >= numraid)
606 		return (ENXIO);
607 
608 	rs = &raid_softc[unit];
609 	raidPtr = raidPtrs[unit];
610 
611 	if ((rs->sc_flags & RAIDF_INITED) == 0)
612 		return ENXIO;
613 
614 	/* we only support dumping to RAID 1 sets */
615 	if (raidPtr->Layout.numDataCol != 1 ||
616 	    raidPtr->Layout.numParityCol != 1)
617 		return EINVAL;
618 
619 
620 	if ((error = raidlock(rs)) != 0)
621 		return error;
622 
623 	if (size % DEV_BSIZE != 0) {
624 		error = EINVAL;
625 		goto out;
626 	}
627 
628 	if (blkno + size / DEV_BSIZE > rs->sc_size) {
629 		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
630 		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
631 		    size / DEV_BSIZE, rs->sc_size);
632 		error = EINVAL;
633 		goto out;
634 	}
635 
636 	part = DISKPART(dev);
637 	lp = rs->sc_dkdev.dk_label;
638 	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
639 
640 	/* figure out what device is alive.. */
641 
642 	/*
643 	   Look for a component to dump to.  The preference for the
644 	   component to dump to is as follows:
645 	   1) the master
646 	   2) a used_spare of the master
647 	   3) the slave
648 	   4) a used_spare of the slave
649 	*/
650 
651 	dumpto = -1;
652 	for (c = 0; c < raidPtr->numCol; c++) {
653 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
654 			/* this might be the one */
655 			dumpto = c;
656 			break;
657 		}
658 	}
659 
660 	/*
661 	   At this point we have possibly selected a live master or a
662 	   live slave.  We now check to see if there is a spared
663 	   master (or a spared slave), if we didn't find a live master
664 	   or a live slave.
665 	*/
666 
667 	for (c = 0; c < raidPtr->numSpare; c++) {
668 		sparecol = raidPtr->numCol + c;
669 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
670 			/* How about this one? */
671 			scol = -1;
672 			for(j=0;j<raidPtr->numCol;j++) {
673 				if (raidPtr->Disks[j].spareCol == sparecol) {
674 					scol = j;
675 					break;
676 				}
677 			}
678 			if (scol == 0) {
679 				/*
680 				   We must have found a spared master!
681 				   We'll take that over anything else
682 				   found so far.  (We couldn't have
683 				   found a real master before, since
684 				   this is a used spare, and it's
685 				   saying that it's replacing the
686 				   master.)  On reboot (with
687 				   autoconfiguration turned on)
688 				   sparecol will become the 1st
689 				   component (component0) of this set.
690 				*/
691 				dumpto = sparecol;
692 				break;
693 			} else if (scol != -1) {
694 				/*
695 				   Must be a spared slave.  We'll dump
696 				   to that if we havn't found anything
697 				   else so far.
698 				*/
699 				if (dumpto == -1)
700 					dumpto = sparecol;
701 			}
702 		}
703 	}
704 
705 	if (dumpto == -1) {
706 		/* we couldn't find any live components to dump to!?!?
707 		 */
708 		error = EINVAL;
709 		goto out;
710 	}
711 
712 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
713 
714 	/*
715 	   Note that blkno is relative to this particular partition.
716 	   By adding the offset of this partition in the RAID
717 	   set, and also adding RF_PROTECTED_SECTORS, we get a
718 	   value that is relative to the partition used for the
719 	   underlying component.
720 	*/
721 
722 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
723 				blkno + offset, va, size);
724 
725 out:
726 	raidunlock(rs);
727 
728 	return error;
729 }
730 /* ARGSUSED */
731 int
732 raidopen(dev_t dev, int flags, int fmt,
733     struct lwp *l)
734 {
735 	int     unit = raidunit(dev);
736 	struct raid_softc *rs;
737 	struct disklabel *lp;
738 	int     part, pmask;
739 	int     error = 0;
740 
741 	if (unit >= numraid)
742 		return (ENXIO);
743 	rs = &raid_softc[unit];
744 
745 	if ((error = raidlock(rs)) != 0)
746 		return (error);
747 
748 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
749 		error = EBUSY;
750 		goto bad;
751 	}
752 
753 	lp = rs->sc_dkdev.dk_label;
754 
755 	part = DISKPART(dev);
756 
757 	/*
758 	 * If there are wedges, and this is not RAW_PART, then we
759 	 * need to fail.
760 	 */
761 	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
762 		error = EBUSY;
763 		goto bad;
764 	}
765 	pmask = (1 << part);
766 
767 	if ((rs->sc_flags & RAIDF_INITED) &&
768 	    (rs->sc_dkdev.dk_openmask == 0))
769 		raidgetdisklabel(dev);
770 
771 	/* make sure that this partition exists */
772 
773 	if (part != RAW_PART) {
774 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
775 		    ((part >= lp->d_npartitions) ||
776 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
777 			error = ENXIO;
778 			goto bad;
779 		}
780 	}
781 	/* Prevent this unit from being unconfigured while open. */
782 	switch (fmt) {
783 	case S_IFCHR:
784 		rs->sc_dkdev.dk_copenmask |= pmask;
785 		break;
786 
787 	case S_IFBLK:
788 		rs->sc_dkdev.dk_bopenmask |= pmask;
789 		break;
790 	}
791 
792 	if ((rs->sc_dkdev.dk_openmask == 0) &&
793 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
794 		/* First one... mark things as dirty... Note that we *MUST*
795 		 have done a configure before this.  I DO NOT WANT TO BE
796 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
797 		 THAT THEY BELONG TOGETHER!!!!! */
798 		/* XXX should check to see if we're only open for reading
799 		   here... If so, we needn't do this, but then need some
800 		   other way of keeping track of what's happened.. */
801 
802 		rf_markalldirty(raidPtrs[unit]);
803 	}
804 
805 
806 	rs->sc_dkdev.dk_openmask =
807 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
808 
809 bad:
810 	raidunlock(rs);
811 
812 	return (error);
813 
814 
815 }
816 /* ARGSUSED */
817 int
818 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
819 {
820 	int     unit = raidunit(dev);
821 	struct raid_softc *rs;
822 	int     error = 0;
823 	int     part;
824 
825 	if (unit >= numraid)
826 		return (ENXIO);
827 	rs = &raid_softc[unit];
828 
829 	if ((error = raidlock(rs)) != 0)
830 		return (error);
831 
832 	part = DISKPART(dev);
833 
834 	/* ...that much closer to allowing unconfiguration... */
835 	switch (fmt) {
836 	case S_IFCHR:
837 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
838 		break;
839 
840 	case S_IFBLK:
841 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
842 		break;
843 	}
844 	rs->sc_dkdev.dk_openmask =
845 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
846 
847 	if ((rs->sc_dkdev.dk_openmask == 0) &&
848 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
849 		/* Last one... device is not unconfigured yet.
850 		   Device shutdown has taken care of setting the
851 		   clean bits if RAIDF_INITED is not set
852 		   mark things as clean... */
853 
854 		rf_update_component_labels(raidPtrs[unit],
855 						 RF_FINAL_COMPONENT_UPDATE);
856 
857 		/* If the kernel is shutting down, it will detach
858 		 * this RAID set soon enough.
859 		 */
860 	}
861 
862 	raidunlock(rs);
863 	return (0);
864 
865 }
866 
867 void
868 raidstrategy(struct buf *bp)
869 {
870 	int s;
871 
872 	unsigned int raidID = raidunit(bp->b_dev);
873 	RF_Raid_t *raidPtr;
874 	struct raid_softc *rs = &raid_softc[raidID];
875 	int     wlabel;
876 
877 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
878 		bp->b_error = ENXIO;
879 		goto done;
880 	}
881 	if (raidID >= numraid || !raidPtrs[raidID]) {
882 		bp->b_error = ENODEV;
883 		goto done;
884 	}
885 	raidPtr = raidPtrs[raidID];
886 	if (!raidPtr->valid) {
887 		bp->b_error = ENODEV;
888 		goto done;
889 	}
890 	if (bp->b_bcount == 0) {
891 		db1_printf(("b_bcount is zero..\n"));
892 		goto done;
893 	}
894 
895 	/*
896 	 * Do bounds checking and adjust transfer.  If there's an
897 	 * error, the bounds check will flag that for us.
898 	 */
899 
900 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
901 	if (DISKPART(bp->b_dev) == RAW_PART) {
902 		uint64_t size; /* device size in DEV_BSIZE unit */
903 
904 		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
905 			size = raidPtr->totalSectors <<
906 			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
907 		} else {
908 			size = raidPtr->totalSectors >>
909 			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
910 		}
911 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
912 			goto done;
913 		}
914 	} else {
915 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
916 			db1_printf(("Bounds check failed!!:%d %d\n",
917 				(int) bp->b_blkno, (int) wlabel));
918 			goto done;
919 		}
920 	}
921 	s = splbio();
922 
923 	bp->b_resid = 0;
924 
925 	/* stuff it onto our queue */
926 	bufq_put(rs->buf_queue, bp);
927 
928 	/* scheduled the IO to happen at the next convenient time */
929 	wakeup(&(raidPtrs[raidID]->iodone));
930 
931 	splx(s);
932 	return;
933 
934 done:
935 	bp->b_resid = bp->b_bcount;
936 	biodone(bp);
937 }
938 /* ARGSUSED */
939 int
940 raidread(dev_t dev, struct uio *uio, int flags)
941 {
942 	int     unit = raidunit(dev);
943 	struct raid_softc *rs;
944 
945 	if (unit >= numraid)
946 		return (ENXIO);
947 	rs = &raid_softc[unit];
948 
949 	if ((rs->sc_flags & RAIDF_INITED) == 0)
950 		return (ENXIO);
951 
952 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
953 
954 }
955 /* ARGSUSED */
956 int
957 raidwrite(dev_t dev, struct uio *uio, int flags)
958 {
959 	int     unit = raidunit(dev);
960 	struct raid_softc *rs;
961 
962 	if (unit >= numraid)
963 		return (ENXIO);
964 	rs = &raid_softc[unit];
965 
966 	if ((rs->sc_flags & RAIDF_INITED) == 0)
967 		return (ENXIO);
968 
969 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
970 
971 }
972 
973 static int
974 raid_detach_unlocked(struct raid_softc *rs)
975 {
976 	int error;
977 	RF_Raid_t *raidPtr;
978 
979 	raidPtr = raidPtrs[device_unit(rs->sc_dev)];
980 
981 	/*
982 	 * If somebody has a partition mounted, we shouldn't
983 	 * shutdown.
984 	 */
985 	if (rs->sc_dkdev.dk_openmask != 0)
986 		return EBUSY;
987 
988 	if ((rs->sc_flags & RAIDF_INITED) == 0)
989 		;	/* not initialized: nothing to do */
990 	else if ((error = rf_Shutdown(raidPtr)) != 0)
991 		return error;
992 	else
993 		rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
994 
995 	/* Detach the disk. */
996 	disk_detach(&rs->sc_dkdev);
997 	disk_destroy(&rs->sc_dkdev);
998 
999 	return 0;
1000 }
1001 
1002 int
1003 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1004 {
1005 	int     unit = raidunit(dev);
1006 	int     error = 0;
1007 	int     part, pmask;
1008 	cfdata_t cf;
1009 	struct raid_softc *rs;
1010 	RF_Config_t *k_cfg, *u_cfg;
1011 	RF_Raid_t *raidPtr;
1012 	RF_RaidDisk_t *diskPtr;
1013 	RF_AccTotals_t *totals;
1014 	RF_DeviceConfig_t *d_cfg, **ucfgp;
1015 	u_char *specific_buf;
1016 	int retcode = 0;
1017 	int column;
1018 /*	int raidid; */
1019 	struct rf_recon_req *rrcopy, *rr;
1020 	RF_ComponentLabel_t *clabel;
1021 	RF_ComponentLabel_t *ci_label;
1022 	RF_ComponentLabel_t **clabel_ptr;
1023 	RF_SingleComponent_t *sparePtr,*componentPtr;
1024 	RF_SingleComponent_t component;
1025 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1026 	int i, j, d;
1027 #ifdef __HAVE_OLD_DISKLABEL
1028 	struct disklabel newlabel;
1029 #endif
1030 	struct dkwedge_info *dkw;
1031 
1032 	if (unit >= numraid)
1033 		return (ENXIO);
1034 	rs = &raid_softc[unit];
1035 	raidPtr = raidPtrs[unit];
1036 
1037 	db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1038 		(int) DISKPART(dev), (int) unit, cmd));
1039 
1040 	/* Must be open for writes for these commands... */
1041 	switch (cmd) {
1042 #ifdef DIOCGSECTORSIZE
1043 	case DIOCGSECTORSIZE:
1044 		*(u_int *)data = raidPtr->bytesPerSector;
1045 		return 0;
1046 	case DIOCGMEDIASIZE:
1047 		*(off_t *)data =
1048 		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1049 		return 0;
1050 #endif
1051 	case DIOCSDINFO:
1052 	case DIOCWDINFO:
1053 #ifdef __HAVE_OLD_DISKLABEL
1054 	case ODIOCWDINFO:
1055 	case ODIOCSDINFO:
1056 #endif
1057 	case DIOCWLABEL:
1058 	case DIOCAWEDGE:
1059 	case DIOCDWEDGE:
1060 		if ((flag & FWRITE) == 0)
1061 			return (EBADF);
1062 	}
1063 
1064 	/* Must be initialized for these... */
1065 	switch (cmd) {
1066 	case DIOCGDINFO:
1067 	case DIOCSDINFO:
1068 	case DIOCWDINFO:
1069 #ifdef __HAVE_OLD_DISKLABEL
1070 	case ODIOCGDINFO:
1071 	case ODIOCWDINFO:
1072 	case ODIOCSDINFO:
1073 	case ODIOCGDEFLABEL:
1074 #endif
1075 	case DIOCGPART:
1076 	case DIOCWLABEL:
1077 	case DIOCGDEFLABEL:
1078 	case DIOCAWEDGE:
1079 	case DIOCDWEDGE:
1080 	case DIOCLWEDGES:
1081 	case DIOCCACHESYNC:
1082 	case RAIDFRAME_SHUTDOWN:
1083 	case RAIDFRAME_REWRITEPARITY:
1084 	case RAIDFRAME_GET_INFO:
1085 	case RAIDFRAME_RESET_ACCTOTALS:
1086 	case RAIDFRAME_GET_ACCTOTALS:
1087 	case RAIDFRAME_KEEP_ACCTOTALS:
1088 	case RAIDFRAME_GET_SIZE:
1089 	case RAIDFRAME_FAIL_DISK:
1090 	case RAIDFRAME_COPYBACK:
1091 	case RAIDFRAME_CHECK_RECON_STATUS:
1092 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1093 	case RAIDFRAME_GET_COMPONENT_LABEL:
1094 	case RAIDFRAME_SET_COMPONENT_LABEL:
1095 	case RAIDFRAME_ADD_HOT_SPARE:
1096 	case RAIDFRAME_REMOVE_HOT_SPARE:
1097 	case RAIDFRAME_INIT_LABELS:
1098 	case RAIDFRAME_REBUILD_IN_PLACE:
1099 	case RAIDFRAME_CHECK_PARITY:
1100 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1101 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1102 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
1103 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1104 	case RAIDFRAME_SET_AUTOCONFIG:
1105 	case RAIDFRAME_SET_ROOT:
1106 	case RAIDFRAME_DELETE_COMPONENT:
1107 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
1108 	case RAIDFRAME_PARITYMAP_STATUS:
1109 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
1110 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
1111 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
1112 		if ((rs->sc_flags & RAIDF_INITED) == 0)
1113 			return (ENXIO);
1114 	}
1115 
1116 	switch (cmd) {
1117 #ifdef COMPAT_50
1118 	case RAIDFRAME_GET_INFO50:
1119 		return rf_get_info50(raidPtr, data);
1120 
1121 	case RAIDFRAME_CONFIGURE50:
1122 		if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1123 			return retcode;
1124 		goto config;
1125 #endif
1126 		/* configure the system */
1127 	case RAIDFRAME_CONFIGURE:
1128 
1129 		if (raidPtr->valid) {
1130 			/* There is a valid RAID set running on this unit! */
1131 			printf("raid%d: Device already configured!\n",unit);
1132 			return(EINVAL);
1133 		}
1134 
1135 		/* copy-in the configuration information */
1136 		/* data points to a pointer to the configuration structure */
1137 
1138 		u_cfg = *((RF_Config_t **) data);
1139 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1140 		if (k_cfg == NULL) {
1141 			return (ENOMEM);
1142 		}
1143 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1144 		if (retcode) {
1145 			RF_Free(k_cfg, sizeof(RF_Config_t));
1146 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1147 				retcode));
1148 			return (retcode);
1149 		}
1150 		goto config;
1151 	config:
1152 		/* allocate a buffer for the layout-specific data, and copy it
1153 		 * in */
1154 		if (k_cfg->layoutSpecificSize) {
1155 			if (k_cfg->layoutSpecificSize > 10000) {
1156 				/* sanity check */
1157 				RF_Free(k_cfg, sizeof(RF_Config_t));
1158 				return (EINVAL);
1159 			}
1160 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1161 			    (u_char *));
1162 			if (specific_buf == NULL) {
1163 				RF_Free(k_cfg, sizeof(RF_Config_t));
1164 				return (ENOMEM);
1165 			}
1166 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1167 			    k_cfg->layoutSpecificSize);
1168 			if (retcode) {
1169 				RF_Free(k_cfg, sizeof(RF_Config_t));
1170 				RF_Free(specific_buf,
1171 					k_cfg->layoutSpecificSize);
1172 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1173 					retcode));
1174 				return (retcode);
1175 			}
1176 		} else
1177 			specific_buf = NULL;
1178 		k_cfg->layoutSpecific = specific_buf;
1179 
1180 		/* should do some kind of sanity check on the configuration.
1181 		 * Store the sum of all the bytes in the last byte? */
1182 
1183 		/* configure the system */
1184 
1185 		/*
1186 		 * Clear the entire RAID descriptor, just to make sure
1187 		 *  there is no stale data left in the case of a
1188 		 *  reconfiguration
1189 		 */
1190 		memset(raidPtr, 0, sizeof(*raidPtr));
1191 		raidPtr->raidid = unit;
1192 
1193 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
1194 
1195 		if (retcode == 0) {
1196 
1197 			/* allow this many simultaneous IO's to
1198 			   this RAID device */
1199 			raidPtr->openings = RAIDOUTSTANDING;
1200 
1201 			raidinit(raidPtr);
1202 			rf_markalldirty(raidPtr);
1203 		}
1204 		/* free the buffers.  No return code here. */
1205 		if (k_cfg->layoutSpecificSize) {
1206 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1207 		}
1208 		RF_Free(k_cfg, sizeof(RF_Config_t));
1209 
1210 		return (retcode);
1211 
1212 		/* shutdown the system */
1213 	case RAIDFRAME_SHUTDOWN:
1214 
1215 		part = DISKPART(dev);
1216 		pmask = (1 << part);
1217 
1218 		if ((error = raidlock(rs)) != 0)
1219 			return (error);
1220 
1221 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1222 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1223 			(rs->sc_dkdev.dk_copenmask & pmask)))
1224 			retcode = EBUSY;
1225 		else {
1226 			rs->sc_flags |= RAIDF_SHUTDOWN;
1227 			rs->sc_dkdev.dk_copenmask &= ~pmask;
1228 			rs->sc_dkdev.dk_bopenmask &= ~pmask;
1229 			rs->sc_dkdev.dk_openmask &= ~pmask;
1230 			retcode = 0;
1231 		}
1232 
1233 		raidunlock(rs);
1234 
1235 		if (retcode != 0)
1236 			return retcode;
1237 
1238 		/* free the pseudo device attach bits */
1239 
1240 		cf = device_cfdata(rs->sc_dev);
1241 		if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1242 			free(cf, M_RAIDFRAME);
1243 
1244 		return (retcode);
1245 	case RAIDFRAME_GET_COMPONENT_LABEL:
1246 		clabel_ptr = (RF_ComponentLabel_t **) data;
1247 		/* need to read the component label for the disk indicated
1248 		   by row,column in clabel */
1249 
1250 		/*
1251 		 * Perhaps there should be an option to skip the in-core
1252 		 * copy and hit the disk, as with disklabel(8).
1253 		 */
1254 		RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1255 
1256 		retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1257 
1258 		if (retcode) {
1259 			RF_Free(clabel, sizeof(*clabel));
1260 			return retcode;
1261 		}
1262 
1263 		clabel->row = 0; /* Don't allow looking at anything else.*/
1264 
1265 		column = clabel->column;
1266 
1267 		if ((column < 0) || (column >= raidPtr->numCol +
1268 		    raidPtr->numSpare)) {
1269 			RF_Free(clabel, sizeof(*clabel));
1270 			return EINVAL;
1271 		}
1272 
1273 		RF_Free(clabel, sizeof(*clabel));
1274 
1275 		clabel = raidget_component_label(raidPtr, column);
1276 
1277 		return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1278 
1279 #if 0
1280 	case RAIDFRAME_SET_COMPONENT_LABEL:
1281 		clabel = (RF_ComponentLabel_t *) data;
1282 
1283 		/* XXX check the label for valid stuff... */
1284 		/* Note that some things *should not* get modified --
1285 		   the user should be re-initing the labels instead of
1286 		   trying to patch things.
1287 		   */
1288 
1289 		raidid = raidPtr->raidid;
1290 #ifdef DEBUG
1291 		printf("raid%d: Got component label:\n", raidid);
1292 		printf("raid%d: Version: %d\n", raidid, clabel->version);
1293 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1294 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1295 		printf("raid%d: Column: %d\n", raidid, clabel->column);
1296 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1297 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1298 		printf("raid%d: Status: %d\n", raidid, clabel->status);
1299 #endif
1300 		clabel->row = 0;
1301 		column = clabel->column;
1302 
1303 		if ((column < 0) || (column >= raidPtr->numCol)) {
1304 			return(EINVAL);
1305 		}
1306 
1307 		/* XXX this isn't allowed to do anything for now :-) */
1308 
1309 		/* XXX and before it is, we need to fill in the rest
1310 		   of the fields!?!?!?! */
1311 		memcpy(raidget_component_label(raidPtr, column),
1312 		    clabel, sizeof(*clabel));
1313 		raidflush_component_label(raidPtr, column);
1314 		return (0);
1315 #endif
1316 
1317 	case RAIDFRAME_INIT_LABELS:
1318 		clabel = (RF_ComponentLabel_t *) data;
1319 		/*
1320 		   we only want the serial number from
1321 		   the above.  We get all the rest of the information
1322 		   from the config that was used to create this RAID
1323 		   set.
1324 		   */
1325 
1326 		raidPtr->serial_number = clabel->serial_number;
1327 
1328 		for(column=0;column<raidPtr->numCol;column++) {
1329 			diskPtr = &raidPtr->Disks[column];
1330 			if (!RF_DEAD_DISK(diskPtr->status)) {
1331 				ci_label = raidget_component_label(raidPtr,
1332 				    column);
1333 				/* Zeroing this is important. */
1334 				memset(ci_label, 0, sizeof(*ci_label));
1335 				raid_init_component_label(raidPtr, ci_label);
1336 				ci_label->serial_number =
1337 				    raidPtr->serial_number;
1338 				ci_label->row = 0; /* we dont' pretend to support more */
1339 				ci_label->partitionSize =
1340 				    diskPtr->partitionSize;
1341 				ci_label->column = column;
1342 				raidflush_component_label(raidPtr, column);
1343 			}
1344 			/* XXXjld what about the spares? */
1345 		}
1346 
1347 		return (retcode);
1348 	case RAIDFRAME_SET_AUTOCONFIG:
1349 		d = rf_set_autoconfig(raidPtr, *(int *) data);
1350 		printf("raid%d: New autoconfig value is: %d\n",
1351 		       raidPtr->raidid, d);
1352 		*(int *) data = d;
1353 		return (retcode);
1354 
1355 	case RAIDFRAME_SET_ROOT:
1356 		d = rf_set_rootpartition(raidPtr, *(int *) data);
1357 		printf("raid%d: New rootpartition value is: %d\n",
1358 		       raidPtr->raidid, d);
1359 		*(int *) data = d;
1360 		return (retcode);
1361 
1362 		/* initialize all parity */
1363 	case RAIDFRAME_REWRITEPARITY:
1364 
1365 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1366 			/* Parity for RAID 0 is trivially correct */
1367 			raidPtr->parity_good = RF_RAID_CLEAN;
1368 			return(0);
1369 		}
1370 
1371 		if (raidPtr->parity_rewrite_in_progress == 1) {
1372 			/* Re-write is already in progress! */
1373 			return(EINVAL);
1374 		}
1375 
1376 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1377 					   rf_RewriteParityThread,
1378 					   raidPtr,"raid_parity");
1379 		return (retcode);
1380 
1381 
1382 	case RAIDFRAME_ADD_HOT_SPARE:
1383 		sparePtr = (RF_SingleComponent_t *) data;
1384 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1385 		retcode = rf_add_hot_spare(raidPtr, &component);
1386 		return(retcode);
1387 
1388 	case RAIDFRAME_REMOVE_HOT_SPARE:
1389 		return(retcode);
1390 
1391 	case RAIDFRAME_DELETE_COMPONENT:
1392 		componentPtr = (RF_SingleComponent_t *)data;
1393 		memcpy( &component, componentPtr,
1394 			sizeof(RF_SingleComponent_t));
1395 		retcode = rf_delete_component(raidPtr, &component);
1396 		return(retcode);
1397 
1398 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
1399 		componentPtr = (RF_SingleComponent_t *)data;
1400 		memcpy( &component, componentPtr,
1401 			sizeof(RF_SingleComponent_t));
1402 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
1403 		return(retcode);
1404 
1405 	case RAIDFRAME_REBUILD_IN_PLACE:
1406 
1407 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1408 			/* Can't do this on a RAID 0!! */
1409 			return(EINVAL);
1410 		}
1411 
1412 		if (raidPtr->recon_in_progress == 1) {
1413 			/* a reconstruct is already in progress! */
1414 			return(EINVAL);
1415 		}
1416 
1417 		componentPtr = (RF_SingleComponent_t *) data;
1418 		memcpy( &component, componentPtr,
1419 			sizeof(RF_SingleComponent_t));
1420 		component.row = 0; /* we don't support any more */
1421 		column = component.column;
1422 
1423 		if ((column < 0) || (column >= raidPtr->numCol)) {
1424 			return(EINVAL);
1425 		}
1426 
1427 		RF_LOCK_MUTEX(raidPtr->mutex);
1428 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1429 		    (raidPtr->numFailures > 0)) {
1430 			/* XXX 0 above shouldn't be constant!!! */
1431 			/* some component other than this has failed.
1432 			   Let's not make things worse than they already
1433 			   are... */
1434 			printf("raid%d: Unable to reconstruct to disk at:\n",
1435 			       raidPtr->raidid);
1436 			printf("raid%d:     Col: %d   Too many failures.\n",
1437 			       raidPtr->raidid, column);
1438 			RF_UNLOCK_MUTEX(raidPtr->mutex);
1439 			return (EINVAL);
1440 		}
1441 		if (raidPtr->Disks[column].status ==
1442 		    rf_ds_reconstructing) {
1443 			printf("raid%d: Unable to reconstruct to disk at:\n",
1444 			       raidPtr->raidid);
1445 			printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
1446 
1447 			RF_UNLOCK_MUTEX(raidPtr->mutex);
1448 			return (EINVAL);
1449 		}
1450 		if (raidPtr->Disks[column].status == rf_ds_spared) {
1451 			RF_UNLOCK_MUTEX(raidPtr->mutex);
1452 			return (EINVAL);
1453 		}
1454 		RF_UNLOCK_MUTEX(raidPtr->mutex);
1455 
1456 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1457 		if (rrcopy == NULL)
1458 			return(ENOMEM);
1459 
1460 		rrcopy->raidPtr = (void *) raidPtr;
1461 		rrcopy->col = column;
1462 
1463 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1464 					   rf_ReconstructInPlaceThread,
1465 					   rrcopy,"raid_reconip");
1466 		return(retcode);
1467 
1468 	case RAIDFRAME_GET_INFO:
1469 		if (!raidPtr->valid)
1470 			return (ENODEV);
1471 		ucfgp = (RF_DeviceConfig_t **) data;
1472 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1473 			  (RF_DeviceConfig_t *));
1474 		if (d_cfg == NULL)
1475 			return (ENOMEM);
1476 		d_cfg->rows = 1; /* there is only 1 row now */
1477 		d_cfg->cols = raidPtr->numCol;
1478 		d_cfg->ndevs = raidPtr->numCol;
1479 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
1480 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1481 			return (ENOMEM);
1482 		}
1483 		d_cfg->nspares = raidPtr->numSpare;
1484 		if (d_cfg->nspares >= RF_MAX_DISKS) {
1485 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1486 			return (ENOMEM);
1487 		}
1488 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1489 		d = 0;
1490 		for (j = 0; j < d_cfg->cols; j++) {
1491 			d_cfg->devs[d] = raidPtr->Disks[j];
1492 			d++;
1493 		}
1494 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1495 			d_cfg->spares[i] = raidPtr->Disks[j];
1496 		}
1497 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1498 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1499 
1500 		return (retcode);
1501 
1502 	case RAIDFRAME_CHECK_PARITY:
1503 		*(int *) data = raidPtr->parity_good;
1504 		return (0);
1505 
1506 	case RAIDFRAME_PARITYMAP_STATUS:
1507 		if (rf_paritymap_ineligible(raidPtr))
1508 			return EINVAL;
1509 		rf_paritymap_status(raidPtr->parity_map,
1510 		    (struct rf_pmstat *)data);
1511 		return 0;
1512 
1513 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
1514 		if (rf_paritymap_ineligible(raidPtr))
1515 			return EINVAL;
1516 		if (raidPtr->parity_map == NULL)
1517 			return ENOENT; /* ??? */
1518 		if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1519 			(struct rf_pmparams *)data, 1))
1520 			return EINVAL;
1521 		return 0;
1522 
1523 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
1524 		if (rf_paritymap_ineligible(raidPtr))
1525 			return EINVAL;
1526 		*(int *) data = rf_paritymap_get_disable(raidPtr);
1527 		return 0;
1528 
1529 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
1530 		if (rf_paritymap_ineligible(raidPtr))
1531 			return EINVAL;
1532 		rf_paritymap_set_disable(raidPtr, *(int *)data);
1533 		/* XXX should errors be passed up? */
1534 		return 0;
1535 
1536 	case RAIDFRAME_RESET_ACCTOTALS:
1537 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1538 		return (0);
1539 
1540 	case RAIDFRAME_GET_ACCTOTALS:
1541 		totals = (RF_AccTotals_t *) data;
1542 		*totals = raidPtr->acc_totals;
1543 		return (0);
1544 
1545 	case RAIDFRAME_KEEP_ACCTOTALS:
1546 		raidPtr->keep_acc_totals = *(int *)data;
1547 		return (0);
1548 
1549 	case RAIDFRAME_GET_SIZE:
1550 		*(int *) data = raidPtr->totalSectors;
1551 		return (0);
1552 
1553 		/* fail a disk & optionally start reconstruction */
1554 	case RAIDFRAME_FAIL_DISK:
1555 
1556 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1557 			/* Can't do this on a RAID 0!! */
1558 			return(EINVAL);
1559 		}
1560 
1561 		rr = (struct rf_recon_req *) data;
1562 		rr->row = 0;
1563 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
1564 			return (EINVAL);
1565 
1566 
1567 		RF_LOCK_MUTEX(raidPtr->mutex);
1568 		if (raidPtr->status == rf_rs_reconstructing) {
1569 			/* you can't fail a disk while we're reconstructing! */
1570 			/* XXX wrong for RAID6 */
1571 			RF_UNLOCK_MUTEX(raidPtr->mutex);
1572 			return (EINVAL);
1573 		}
1574 		if ((raidPtr->Disks[rr->col].status ==
1575 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1576 			/* some other component has failed.  Let's not make
1577 			   things worse. XXX wrong for RAID6 */
1578 			RF_UNLOCK_MUTEX(raidPtr->mutex);
1579 			return (EINVAL);
1580 		}
1581 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1582 			/* Can't fail a spared disk! */
1583 			RF_UNLOCK_MUTEX(raidPtr->mutex);
1584 			return (EINVAL);
1585 		}
1586 		RF_UNLOCK_MUTEX(raidPtr->mutex);
1587 
1588 		/* make a copy of the recon request so that we don't rely on
1589 		 * the user's buffer */
1590 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1591 		if (rrcopy == NULL)
1592 			return(ENOMEM);
1593 		memcpy(rrcopy, rr, sizeof(*rr));
1594 		rrcopy->raidPtr = (void *) raidPtr;
1595 
1596 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1597 					   rf_ReconThread,
1598 					   rrcopy,"raid_recon");
1599 		return (0);
1600 
1601 		/* invoke a copyback operation after recon on whatever disk
1602 		 * needs it, if any */
1603 	case RAIDFRAME_COPYBACK:
1604 
1605 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1606 			/* This makes no sense on a RAID 0!! */
1607 			return(EINVAL);
1608 		}
1609 
1610 		if (raidPtr->copyback_in_progress == 1) {
1611 			/* Copyback is already in progress! */
1612 			return(EINVAL);
1613 		}
1614 
1615 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1616 					   rf_CopybackThread,
1617 					   raidPtr,"raid_copyback");
1618 		return (retcode);
1619 
1620 		/* return the percentage completion of reconstruction */
1621 	case RAIDFRAME_CHECK_RECON_STATUS:
1622 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1623 			/* This makes no sense on a RAID 0, so tell the
1624 			   user it's done. */
1625 			*(int *) data = 100;
1626 			return(0);
1627 		}
1628 		if (raidPtr->status != rf_rs_reconstructing)
1629 			*(int *) data = 100;
1630 		else {
1631 			if (raidPtr->reconControl->numRUsTotal > 0) {
1632 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1633 			} else {
1634 				*(int *) data = 0;
1635 			}
1636 		}
1637 		return (0);
1638 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1639 		progressInfoPtr = (RF_ProgressInfo_t **) data;
1640 		if (raidPtr->status != rf_rs_reconstructing) {
1641 			progressInfo.remaining = 0;
1642 			progressInfo.completed = 100;
1643 			progressInfo.total = 100;
1644 		} else {
1645 			progressInfo.total =
1646 				raidPtr->reconControl->numRUsTotal;
1647 			progressInfo.completed =
1648 				raidPtr->reconControl->numRUsComplete;
1649 			progressInfo.remaining = progressInfo.total -
1650 				progressInfo.completed;
1651 		}
1652 		retcode = copyout(&progressInfo, *progressInfoPtr,
1653 				  sizeof(RF_ProgressInfo_t));
1654 		return (retcode);
1655 
1656 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1657 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1658 			/* This makes no sense on a RAID 0, so tell the
1659 			   user it's done. */
1660 			*(int *) data = 100;
1661 			return(0);
1662 		}
1663 		if (raidPtr->parity_rewrite_in_progress == 1) {
1664 			*(int *) data = 100 *
1665 				raidPtr->parity_rewrite_stripes_done /
1666 				raidPtr->Layout.numStripe;
1667 		} else {
1668 			*(int *) data = 100;
1669 		}
1670 		return (0);
1671 
1672 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1673 		progressInfoPtr = (RF_ProgressInfo_t **) data;
1674 		if (raidPtr->parity_rewrite_in_progress == 1) {
1675 			progressInfo.total = raidPtr->Layout.numStripe;
1676 			progressInfo.completed =
1677 				raidPtr->parity_rewrite_stripes_done;
1678 			progressInfo.remaining = progressInfo.total -
1679 				progressInfo.completed;
1680 		} else {
1681 			progressInfo.remaining = 0;
1682 			progressInfo.completed = 100;
1683 			progressInfo.total = 100;
1684 		}
1685 		retcode = copyout(&progressInfo, *progressInfoPtr,
1686 				  sizeof(RF_ProgressInfo_t));
1687 		return (retcode);
1688 
1689 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
1690 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1691 			/* This makes no sense on a RAID 0 */
1692 			*(int *) data = 100;
1693 			return(0);
1694 		}
1695 		if (raidPtr->copyback_in_progress == 1) {
1696 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
1697 				raidPtr->Layout.numStripe;
1698 		} else {
1699 			*(int *) data = 100;
1700 		}
1701 		return (0);
1702 
1703 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1704 		progressInfoPtr = (RF_ProgressInfo_t **) data;
1705 		if (raidPtr->copyback_in_progress == 1) {
1706 			progressInfo.total = raidPtr->Layout.numStripe;
1707 			progressInfo.completed =
1708 				raidPtr->copyback_stripes_done;
1709 			progressInfo.remaining = progressInfo.total -
1710 				progressInfo.completed;
1711 		} else {
1712 			progressInfo.remaining = 0;
1713 			progressInfo.completed = 100;
1714 			progressInfo.total = 100;
1715 		}
1716 		retcode = copyout(&progressInfo, *progressInfoPtr,
1717 				  sizeof(RF_ProgressInfo_t));
1718 		return (retcode);
1719 
1720 		/* the sparetable daemon calls this to wait for the kernel to
1721 		 * need a spare table. this ioctl does not return until a
1722 		 * spare table is needed. XXX -- calling mpsleep here in the
1723 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1724 		 * -- I should either compute the spare table in the kernel,
1725 		 * or have a different -- XXX XXX -- interface (a different
1726 		 * character device) for delivering the table     -- XXX */
1727 #if 0
1728 	case RAIDFRAME_SPARET_WAIT:
1729 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1730 		while (!rf_sparet_wait_queue)
1731 			mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1732 		waitreq = rf_sparet_wait_queue;
1733 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1734 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1735 
1736 		/* structure assignment */
1737 		*((RF_SparetWait_t *) data) = *waitreq;
1738 
1739 		RF_Free(waitreq, sizeof(*waitreq));
1740 		return (0);
1741 
1742 		/* wakes up a process waiting on SPARET_WAIT and puts an error
1743 		 * code in it that will cause the dameon to exit */
1744 	case RAIDFRAME_ABORT_SPARET_WAIT:
1745 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1746 		waitreq->fcol = -1;
1747 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1748 		waitreq->next = rf_sparet_wait_queue;
1749 		rf_sparet_wait_queue = waitreq;
1750 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1751 		wakeup(&rf_sparet_wait_queue);
1752 		return (0);
1753 
1754 		/* used by the spare table daemon to deliver a spare table
1755 		 * into the kernel */
1756 	case RAIDFRAME_SEND_SPARET:
1757 
1758 		/* install the spare table */
1759 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1760 
1761 		/* respond to the requestor.  the return status of the spare
1762 		 * table installation is passed in the "fcol" field */
1763 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1764 		waitreq->fcol = retcode;
1765 		RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1766 		waitreq->next = rf_sparet_resp_queue;
1767 		rf_sparet_resp_queue = waitreq;
1768 		wakeup(&rf_sparet_resp_queue);
1769 		RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1770 
1771 		return (retcode);
1772 #endif
1773 
1774 	default:
1775 		break; /* fall through to the os-specific code below */
1776 
1777 	}
1778 
1779 	if (!raidPtr->valid)
1780 		return (EINVAL);
1781 
1782 	/*
1783 	 * Add support for "regular" device ioctls here.
1784 	 */
1785 
1786 	error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1787 	if (error != EPASSTHROUGH)
1788 		return (error);
1789 
1790 	switch (cmd) {
1791 	case DIOCGDINFO:
1792 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1793 		break;
1794 #ifdef __HAVE_OLD_DISKLABEL
1795 	case ODIOCGDINFO:
1796 		newlabel = *(rs->sc_dkdev.dk_label);
1797 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1798 			return ENOTTY;
1799 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
1800 		break;
1801 #endif
1802 
1803 	case DIOCGPART:
1804 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1805 		((struct partinfo *) data)->part =
1806 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1807 		break;
1808 
1809 	case DIOCWDINFO:
1810 	case DIOCSDINFO:
1811 #ifdef __HAVE_OLD_DISKLABEL
1812 	case ODIOCWDINFO:
1813 	case ODIOCSDINFO:
1814 #endif
1815 	{
1816 		struct disklabel *lp;
1817 #ifdef __HAVE_OLD_DISKLABEL
1818 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1819 			memset(&newlabel, 0, sizeof newlabel);
1820 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
1821 			lp = &newlabel;
1822 		} else
1823 #endif
1824 		lp = (struct disklabel *)data;
1825 
1826 		if ((error = raidlock(rs)) != 0)
1827 			return (error);
1828 
1829 		rs->sc_flags |= RAIDF_LABELLING;
1830 
1831 		error = setdisklabel(rs->sc_dkdev.dk_label,
1832 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
1833 		if (error == 0) {
1834 			if (cmd == DIOCWDINFO
1835 #ifdef __HAVE_OLD_DISKLABEL
1836 			    || cmd == ODIOCWDINFO
1837 #endif
1838 			   )
1839 				error = writedisklabel(RAIDLABELDEV(dev),
1840 				    raidstrategy, rs->sc_dkdev.dk_label,
1841 				    rs->sc_dkdev.dk_cpulabel);
1842 		}
1843 		rs->sc_flags &= ~RAIDF_LABELLING;
1844 
1845 		raidunlock(rs);
1846 
1847 		if (error)
1848 			return (error);
1849 		break;
1850 	}
1851 
1852 	case DIOCWLABEL:
1853 		if (*(int *) data != 0)
1854 			rs->sc_flags |= RAIDF_WLABEL;
1855 		else
1856 			rs->sc_flags &= ~RAIDF_WLABEL;
1857 		break;
1858 
1859 	case DIOCGDEFLABEL:
1860 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1861 		break;
1862 
1863 #ifdef __HAVE_OLD_DISKLABEL
1864 	case ODIOCGDEFLABEL:
1865 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
1866 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1867 			return ENOTTY;
1868 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
1869 		break;
1870 #endif
1871 
1872 	case DIOCAWEDGE:
1873 	case DIOCDWEDGE:
1874 	    	dkw = (void *)data;
1875 
1876 		/* If the ioctl happens here, the parent is us. */
1877 		(void)strcpy(dkw->dkw_parent, rs->sc_xname);
1878 		return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1879 
1880 	case DIOCLWEDGES:
1881 		return dkwedge_list(&rs->sc_dkdev,
1882 		    (struct dkwedge_list *)data, l);
1883 	case DIOCCACHESYNC:
1884 		return rf_sync_component_caches(raidPtr);
1885 	default:
1886 		retcode = ENOTTY;
1887 	}
1888 	return (retcode);
1889 
1890 }
1891 
1892 
1893 /* raidinit -- complete the rest of the initialization for the
1894    RAIDframe device.  */
1895 
1896 
1897 static void
1898 raidinit(RF_Raid_t *raidPtr)
1899 {
1900 	cfdata_t cf;
1901 	struct raid_softc *rs;
1902 	int     unit;
1903 
1904 	unit = raidPtr->raidid;
1905 
1906 	rs = &raid_softc[unit];
1907 
1908 	/* XXX should check return code first... */
1909 	rs->sc_flags |= RAIDF_INITED;
1910 
1911 	/* XXX doesn't check bounds. */
1912 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1913 
1914 	/* attach the pseudo device */
1915 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1916 	cf->cf_name = raid_cd.cd_name;
1917 	cf->cf_atname = raid_cd.cd_name;
1918 	cf->cf_unit = unit;
1919 	cf->cf_fstate = FSTATE_STAR;
1920 
1921 	rs->sc_dev = config_attach_pseudo(cf);
1922 
1923 	if (rs->sc_dev == NULL) {
1924 		printf("raid%d: config_attach_pseudo failed\n",
1925 		    raidPtr->raidid);
1926 		rs->sc_flags &= ~RAIDF_INITED;
1927 		free(cf, M_RAIDFRAME);
1928 		return;
1929 	}
1930 
1931 	/* disk_attach actually creates space for the CPU disklabel, among
1932 	 * other things, so it's critical to call this *BEFORE* we try putzing
1933 	 * with disklabels. */
1934 
1935 	disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1936 	disk_attach(&rs->sc_dkdev);
1937 	disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
1938 
1939 	/* XXX There may be a weird interaction here between this, and
1940 	 * protectedSectors, as used in RAIDframe.  */
1941 
1942 	rs->sc_size = raidPtr->totalSectors;
1943 
1944 	dkwedge_discover(&rs->sc_dkdev);
1945 
1946 	rf_set_properties(rs, raidPtr);
1947 
1948 }
1949 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1950 /* wake up the daemon & tell it to get us a spare table
1951  * XXX
1952  * the entries in the queues should be tagged with the raidPtr
1953  * so that in the extremely rare case that two recons happen at once,
1954  * we know for which device were requesting a spare table
1955  * XXX
1956  *
1957  * XXX This code is not currently used. GO
1958  */
1959 int
1960 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1961 {
1962 	int     retcode;
1963 
1964 	RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1965 	req->next = rf_sparet_wait_queue;
1966 	rf_sparet_wait_queue = req;
1967 	wakeup(&rf_sparet_wait_queue);
1968 
1969 	/* mpsleep unlocks the mutex */
1970 	while (!rf_sparet_resp_queue) {
1971 		tsleep(&rf_sparet_resp_queue, PRIBIO,
1972 		    "raidframe getsparetable", 0);
1973 	}
1974 	req = rf_sparet_resp_queue;
1975 	rf_sparet_resp_queue = req->next;
1976 	RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1977 
1978 	retcode = req->fcol;
1979 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
1980 					 * alloc'd */
1981 	return (retcode);
1982 }
1983 #endif
1984 
1985 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1986  * bp & passes it down.
1987  * any calls originating in the kernel must use non-blocking I/O
1988  * do some extra sanity checking to return "appropriate" error values for
1989  * certain conditions (to make some standard utilities work)
1990  *
1991  * Formerly known as: rf_DoAccessKernel
1992  */
1993 void
1994 raidstart(RF_Raid_t *raidPtr)
1995 {
1996 	RF_SectorCount_t num_blocks, pb, sum;
1997 	RF_RaidAddr_t raid_addr;
1998 	struct partition *pp;
1999 	daddr_t blocknum;
2000 	int     unit;
2001 	struct raid_softc *rs;
2002 	int     do_async;
2003 	struct buf *bp;
2004 	int rc;
2005 
2006 	unit = raidPtr->raidid;
2007 	rs = &raid_softc[unit];
2008 
2009 	/* quick check to see if anything has died recently */
2010 	RF_LOCK_MUTEX(raidPtr->mutex);
2011 	if (raidPtr->numNewFailures > 0) {
2012 		RF_UNLOCK_MUTEX(raidPtr->mutex);
2013 		rf_update_component_labels(raidPtr,
2014 					   RF_NORMAL_COMPONENT_UPDATE);
2015 		RF_LOCK_MUTEX(raidPtr->mutex);
2016 		raidPtr->numNewFailures--;
2017 	}
2018 
2019 	/* Check to see if we're at the limit... */
2020 	while (raidPtr->openings > 0) {
2021 		RF_UNLOCK_MUTEX(raidPtr->mutex);
2022 
2023 		/* get the next item, if any, from the queue */
2024 		if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2025 			/* nothing more to do */
2026 			return;
2027 		}
2028 
2029 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
2030 		 * partition.. Need to make it absolute to the underlying
2031 		 * device.. */
2032 
2033 		blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2034 		if (DISKPART(bp->b_dev) != RAW_PART) {
2035 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2036 			blocknum += pp->p_offset;
2037 		}
2038 
2039 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2040 			    (int) blocknum));
2041 
2042 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2043 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2044 
2045 		/* *THIS* is where we adjust what block we're going to...
2046 		 * but DO NOT TOUCH bp->b_blkno!!! */
2047 		raid_addr = blocknum;
2048 
2049 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2050 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2051 		sum = raid_addr + num_blocks + pb;
2052 		if (1 || rf_debugKernelAccess) {
2053 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2054 				    (int) raid_addr, (int) sum, (int) num_blocks,
2055 				    (int) pb, (int) bp->b_resid));
2056 		}
2057 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2058 		    || (sum < num_blocks) || (sum < pb)) {
2059 			bp->b_error = ENOSPC;
2060 			bp->b_resid = bp->b_bcount;
2061 			biodone(bp);
2062 			RF_LOCK_MUTEX(raidPtr->mutex);
2063 			continue;
2064 		}
2065 		/*
2066 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2067 		 */
2068 
2069 		if (bp->b_bcount & raidPtr->sectorMask) {
2070 			bp->b_error = EINVAL;
2071 			bp->b_resid = bp->b_bcount;
2072 			biodone(bp);
2073 			RF_LOCK_MUTEX(raidPtr->mutex);
2074 			continue;
2075 
2076 		}
2077 		db1_printf(("Calling DoAccess..\n"));
2078 
2079 
2080 		RF_LOCK_MUTEX(raidPtr->mutex);
2081 		raidPtr->openings--;
2082 		RF_UNLOCK_MUTEX(raidPtr->mutex);
2083 
2084 		/*
2085 		 * Everything is async.
2086 		 */
2087 		do_async = 1;
2088 
2089 		disk_busy(&rs->sc_dkdev);
2090 
2091 		/* XXX we're still at splbio() here... do we *really*
2092 		   need to be? */
2093 
2094 		/* don't ever condition on bp->b_flags & B_WRITE.
2095 		 * always condition on B_READ instead */
2096 
2097 		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2098 				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2099 				 do_async, raid_addr, num_blocks,
2100 				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2101 
2102 		if (rc) {
2103 			bp->b_error = rc;
2104 			bp->b_resid = bp->b_bcount;
2105 			biodone(bp);
2106 			/* continue loop */
2107 		}
2108 
2109 		RF_LOCK_MUTEX(raidPtr->mutex);
2110 	}
2111 	RF_UNLOCK_MUTEX(raidPtr->mutex);
2112 }
2113 
2114 
2115 
2116 
2117 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
2118 
2119 int
2120 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2121 {
2122 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2123 	struct buf *bp;
2124 
2125 	req->queue = queue;
2126 	bp = req->bp;
2127 
2128 	switch (req->type) {
2129 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
2130 		/* XXX need to do something extra here.. */
2131 		/* I'm leaving this in, as I've never actually seen it used,
2132 		 * and I'd like folks to report it... GO */
2133 		printf(("WAKEUP CALLED\n"));
2134 		queue->numOutstanding++;
2135 
2136 		bp->b_flags = 0;
2137 		bp->b_private = req;
2138 
2139 		KernelWakeupFunc(bp);
2140 		break;
2141 
2142 	case RF_IO_TYPE_READ:
2143 	case RF_IO_TYPE_WRITE:
2144 #if RF_ACC_TRACE > 0
2145 		if (req->tracerec) {
2146 			RF_ETIMER_START(req->tracerec->timer);
2147 		}
2148 #endif
2149 		InitBP(bp, queue->rf_cinfo->ci_vp,
2150 		    op, queue->rf_cinfo->ci_dev,
2151 		    req->sectorOffset, req->numSector,
2152 		    req->buf, KernelWakeupFunc, (void *) req,
2153 		    queue->raidPtr->logBytesPerSector, req->b_proc);
2154 
2155 		if (rf_debugKernelAccess) {
2156 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
2157 				(long) bp->b_blkno));
2158 		}
2159 		queue->numOutstanding++;
2160 		queue->last_deq_sector = req->sectorOffset;
2161 		/* acc wouldn't have been let in if there were any pending
2162 		 * reqs at any other priority */
2163 		queue->curPriority = req->priority;
2164 
2165 		db1_printf(("Going for %c to unit %d col %d\n",
2166 			    req->type, queue->raidPtr->raidid,
2167 			    queue->col));
2168 		db1_printf(("sector %d count %d (%d bytes) %d\n",
2169 			(int) req->sectorOffset, (int) req->numSector,
2170 			(int) (req->numSector <<
2171 			    queue->raidPtr->logBytesPerSector),
2172 			(int) queue->raidPtr->logBytesPerSector));
2173 
2174 		/*
2175 		 * XXX: drop lock here since this can block at
2176 		 * least with backing SCSI devices.  Retake it
2177 		 * to minimize fuss with calling interfaces.
2178 		 */
2179 
2180 		RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2181 		bdev_strategy(bp);
2182 		RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2183 		break;
2184 
2185 	default:
2186 		panic("bad req->type in rf_DispatchKernelIO");
2187 	}
2188 	db1_printf(("Exiting from DispatchKernelIO\n"));
2189 
2190 	return (0);
2191 }
2192 /* this is the callback function associated with a I/O invoked from
2193    kernel code.
2194  */
2195 static void
2196 KernelWakeupFunc(struct buf *bp)
2197 {
2198 	RF_DiskQueueData_t *req = NULL;
2199 	RF_DiskQueue_t *queue;
2200 	int s;
2201 
2202 	s = splbio();
2203 	db1_printf(("recovering the request queue:\n"));
2204 	req = bp->b_private;
2205 
2206 	queue = (RF_DiskQueue_t *) req->queue;
2207 
2208 #if RF_ACC_TRACE > 0
2209 	if (req->tracerec) {
2210 		RF_ETIMER_STOP(req->tracerec->timer);
2211 		RF_ETIMER_EVAL(req->tracerec->timer);
2212 		RF_LOCK_MUTEX(rf_tracing_mutex);
2213 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2214 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2215 		req->tracerec->num_phys_ios++;
2216 		RF_UNLOCK_MUTEX(rf_tracing_mutex);
2217 	}
2218 #endif
2219 
2220 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
2221 	 * ballistic, and mark the component as hosed... */
2222 
2223 	if (bp->b_error != 0) {
2224 		/* Mark the disk as dead */
2225 		/* but only mark it once... */
2226 		/* and only if it wouldn't leave this RAID set
2227 		   completely broken */
2228 		if (((queue->raidPtr->Disks[queue->col].status ==
2229 		      rf_ds_optimal) ||
2230 		     (queue->raidPtr->Disks[queue->col].status ==
2231 		      rf_ds_used_spare)) &&
2232 		     (queue->raidPtr->numFailures <
2233 		      queue->raidPtr->Layout.map->faultsTolerated)) {
2234 			printf("raid%d: IO Error.  Marking %s as failed.\n",
2235 			       queue->raidPtr->raidid,
2236 			       queue->raidPtr->Disks[queue->col].devname);
2237 			queue->raidPtr->Disks[queue->col].status =
2238 			    rf_ds_failed;
2239 			queue->raidPtr->status = rf_rs_degraded;
2240 			queue->raidPtr->numFailures++;
2241 			queue->raidPtr->numNewFailures++;
2242 		} else {	/* Disk is already dead... */
2243 			/* printf("Disk already marked as dead!\n"); */
2244 		}
2245 
2246 	}
2247 
2248 	/* Fill in the error value */
2249 
2250 	req->error = bp->b_error;
2251 
2252 	simple_lock(&queue->raidPtr->iodone_lock);
2253 
2254 	/* Drop this one on the "finished" queue... */
2255 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2256 
2257 	/* Let the raidio thread know there is work to be done. */
2258 	wakeup(&(queue->raidPtr->iodone));
2259 
2260 	simple_unlock(&queue->raidPtr->iodone_lock);
2261 
2262 	splx(s);
2263 }
2264 
2265 
2266 
2267 /*
2268  * initialize a buf structure for doing an I/O in the kernel.
2269  */
2270 static void
2271 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2272        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2273        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2274        struct proc *b_proc)
2275 {
2276 	/* bp->b_flags       = B_PHYS | rw_flag; */
2277 	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
2278 	bp->b_oflags = 0;
2279 	bp->b_cflags = 0;
2280 	bp->b_bcount = numSect << logBytesPerSector;
2281 	bp->b_bufsize = bp->b_bcount;
2282 	bp->b_error = 0;
2283 	bp->b_dev = dev;
2284 	bp->b_data = bf;
2285 	bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2286 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
2287 	if (bp->b_bcount == 0) {
2288 		panic("bp->b_bcount is zero in InitBP!!");
2289 	}
2290 	bp->b_proc = b_proc;
2291 	bp->b_iodone = cbFunc;
2292 	bp->b_private = cbArg;
2293 }
2294 
2295 static void
2296 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2297 		    struct disklabel *lp)
2298 {
2299 	memset(lp, 0, sizeof(*lp));
2300 
2301 	/* fabricate a label... */
2302 	lp->d_secperunit = raidPtr->totalSectors;
2303 	lp->d_secsize = raidPtr->bytesPerSector;
2304 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2305 	lp->d_ntracks = 4 * raidPtr->numCol;
2306 	lp->d_ncylinders = raidPtr->totalSectors /
2307 		(lp->d_nsectors * lp->d_ntracks);
2308 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2309 
2310 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2311 	lp->d_type = DTYPE_RAID;
2312 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2313 	lp->d_rpm = 3600;
2314 	lp->d_interleave = 1;
2315 	lp->d_flags = 0;
2316 
2317 	lp->d_partitions[RAW_PART].p_offset = 0;
2318 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2319 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2320 	lp->d_npartitions = RAW_PART + 1;
2321 
2322 	lp->d_magic = DISKMAGIC;
2323 	lp->d_magic2 = DISKMAGIC;
2324 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2325 
2326 }
2327 /*
2328  * Read the disklabel from the raid device.  If one is not present, fake one
2329  * up.
2330  */
2331 static void
2332 raidgetdisklabel(dev_t dev)
2333 {
2334 	int     unit = raidunit(dev);
2335 	struct raid_softc *rs = &raid_softc[unit];
2336 	const char   *errstring;
2337 	struct disklabel *lp = rs->sc_dkdev.dk_label;
2338 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2339 	RF_Raid_t *raidPtr;
2340 
2341 	db1_printf(("Getting the disklabel...\n"));
2342 
2343 	memset(clp, 0, sizeof(*clp));
2344 
2345 	raidPtr = raidPtrs[unit];
2346 
2347 	raidgetdefaultlabel(raidPtr, rs, lp);
2348 
2349 	/*
2350 	 * Call the generic disklabel extraction routine.
2351 	 */
2352 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2353 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2354 	if (errstring)
2355 		raidmakedisklabel(rs);
2356 	else {
2357 		int     i;
2358 		struct partition *pp;
2359 
2360 		/*
2361 		 * Sanity check whether the found disklabel is valid.
2362 		 *
2363 		 * This is necessary since total size of the raid device
2364 		 * may vary when an interleave is changed even though exactly
2365 		 * same components are used, and old disklabel may used
2366 		 * if that is found.
2367 		 */
2368 		if (lp->d_secperunit != rs->sc_size)
2369 			printf("raid%d: WARNING: %s: "
2370 			    "total sector size in disklabel (%" PRIu32 ") != "
2371 			    "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2372 			    lp->d_secperunit, rs->sc_size);
2373 		for (i = 0; i < lp->d_npartitions; i++) {
2374 			pp = &lp->d_partitions[i];
2375 			if (pp->p_offset + pp->p_size > rs->sc_size)
2376 				printf("raid%d: WARNING: %s: end of partition `%c' "
2377 				       "exceeds the size of raid (%" PRIu64 ")\n",
2378 				       unit, rs->sc_xname, 'a' + i, rs->sc_size);
2379 		}
2380 	}
2381 
2382 }
2383 /*
2384  * Take care of things one might want to take care of in the event
2385  * that a disklabel isn't present.
2386  */
2387 static void
2388 raidmakedisklabel(struct raid_softc *rs)
2389 {
2390 	struct disklabel *lp = rs->sc_dkdev.dk_label;
2391 	db1_printf(("Making a label..\n"));
2392 
2393 	/*
2394 	 * For historical reasons, if there's no disklabel present
2395 	 * the raw partition must be marked FS_BSDFFS.
2396 	 */
2397 
2398 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2399 
2400 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2401 
2402 	lp->d_checksum = dkcksum(lp);
2403 }
2404 /*
2405  * Wait interruptibly for an exclusive lock.
2406  *
2407  * XXX
2408  * Several drivers do this; it should be abstracted and made MP-safe.
2409  * (Hmm... where have we seen this warning before :->  GO )
2410  */
2411 static int
2412 raidlock(struct raid_softc *rs)
2413 {
2414 	int     error;
2415 
2416 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2417 		rs->sc_flags |= RAIDF_WANTED;
2418 		if ((error =
2419 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2420 			return (error);
2421 	}
2422 	rs->sc_flags |= RAIDF_LOCKED;
2423 	return (0);
2424 }
2425 /*
2426  * Unlock and wake up any waiters.
2427  */
2428 static void
2429 raidunlock(struct raid_softc *rs)
2430 {
2431 
2432 	rs->sc_flags &= ~RAIDF_LOCKED;
2433 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2434 		rs->sc_flags &= ~RAIDF_WANTED;
2435 		wakeup(rs);
2436 	}
2437 }
2438 
2439 
2440 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
2441 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
2442 #define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
2443 
2444 static daddr_t
2445 rf_component_info_offset(void)
2446 {
2447 
2448 	return RF_COMPONENT_INFO_OFFSET;
2449 }
2450 
2451 static daddr_t
2452 rf_component_info_size(unsigned secsize)
2453 {
2454 	daddr_t info_size;
2455 
2456 	KASSERT(secsize);
2457 	if (secsize > RF_COMPONENT_INFO_SIZE)
2458 		info_size = secsize;
2459 	else
2460 		info_size = RF_COMPONENT_INFO_SIZE;
2461 
2462 	return info_size;
2463 }
2464 
2465 static daddr_t
2466 rf_parity_map_offset(RF_Raid_t *raidPtr)
2467 {
2468 	daddr_t map_offset;
2469 
2470 	KASSERT(raidPtr->bytesPerSector);
2471 	if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2472 		map_offset = raidPtr->bytesPerSector;
2473 	else
2474 		map_offset = RF_COMPONENT_INFO_SIZE;
2475 	map_offset += rf_component_info_offset();
2476 
2477 	return map_offset;
2478 }
2479 
2480 static daddr_t
2481 rf_parity_map_size(RF_Raid_t *raidPtr)
2482 {
2483 	daddr_t map_size;
2484 
2485 	if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2486 		map_size = raidPtr->bytesPerSector;
2487 	else
2488 		map_size = RF_PARITY_MAP_SIZE;
2489 
2490 	return map_size;
2491 }
2492 
2493 int
2494 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2495 {
2496 	RF_ComponentLabel_t *clabel;
2497 
2498 	clabel = raidget_component_label(raidPtr, col);
2499 	clabel->clean = RF_RAID_CLEAN;
2500 	raidflush_component_label(raidPtr, col);
2501 	return(0);
2502 }
2503 
2504 
2505 int
2506 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2507 {
2508 	RF_ComponentLabel_t *clabel;
2509 
2510 	clabel = raidget_component_label(raidPtr, col);
2511 	clabel->clean = RF_RAID_DIRTY;
2512 	raidflush_component_label(raidPtr, col);
2513 	return(0);
2514 }
2515 
2516 int
2517 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2518 {
2519 	KASSERT(raidPtr->bytesPerSector);
2520 	return raidread_component_label(raidPtr->bytesPerSector,
2521 	    raidPtr->Disks[col].dev,
2522 	    raidPtr->raid_cinfo[col].ci_vp,
2523 	    &raidPtr->raid_cinfo[col].ci_label);
2524 }
2525 
2526 RF_ComponentLabel_t *
2527 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2528 {
2529 	return &raidPtr->raid_cinfo[col].ci_label;
2530 }
2531 
2532 int
2533 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2534 {
2535 	RF_ComponentLabel_t *label;
2536 
2537 	label = &raidPtr->raid_cinfo[col].ci_label;
2538 	label->mod_counter = raidPtr->mod_counter;
2539 #ifndef RF_NO_PARITY_MAP
2540 	label->parity_map_modcount = label->mod_counter;
2541 #endif
2542 	return raidwrite_component_label(raidPtr->bytesPerSector,
2543 	    raidPtr->Disks[col].dev,
2544 	    raidPtr->raid_cinfo[col].ci_vp, label);
2545 }
2546 
2547 
2548 static int
2549 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2550     RF_ComponentLabel_t *clabel)
2551 {
2552 	return raidread_component_area(dev, b_vp, clabel,
2553 	    sizeof(RF_ComponentLabel_t),
2554 	    rf_component_info_offset(),
2555 	    rf_component_info_size(secsize));
2556 }
2557 
2558 /* ARGSUSED */
2559 static int
2560 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2561     size_t msize, daddr_t offset, daddr_t dsize)
2562 {
2563 	struct buf *bp;
2564 	const struct bdevsw *bdev;
2565 	int error;
2566 
2567 	/* XXX should probably ensure that we don't try to do this if
2568 	   someone has changed rf_protected_sectors. */
2569 
2570 	if (b_vp == NULL) {
2571 		/* For whatever reason, this component is not valid.
2572 		   Don't try to read a component label from it. */
2573 		return(EINVAL);
2574 	}
2575 
2576 	/* get a block of the appropriate size... */
2577 	bp = geteblk((int)dsize);
2578 	bp->b_dev = dev;
2579 
2580 	/* get our ducks in a row for the read */
2581 	bp->b_blkno = offset / DEV_BSIZE;
2582 	bp->b_bcount = dsize;
2583 	bp->b_flags |= B_READ;
2584  	bp->b_resid = dsize;
2585 
2586 	bdev = bdevsw_lookup(bp->b_dev);
2587 	if (bdev == NULL)
2588 		return (ENXIO);
2589 	(*bdev->d_strategy)(bp);
2590 
2591 	error = biowait(bp);
2592 
2593 	if (!error) {
2594 		memcpy(data, bp->b_data, msize);
2595 	}
2596 
2597 	brelse(bp, 0);
2598 	return(error);
2599 }
2600 
2601 
2602 static int
2603 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2604     RF_ComponentLabel_t *clabel)
2605 {
2606 	return raidwrite_component_area(dev, b_vp, clabel,
2607 	    sizeof(RF_ComponentLabel_t),
2608 	    rf_component_info_offset(),
2609 	    rf_component_info_size(secsize), 0);
2610 }
2611 
2612 /* ARGSUSED */
2613 static int
2614 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2615     size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2616 {
2617 	struct buf *bp;
2618 	const struct bdevsw *bdev;
2619 	int error;
2620 
2621 	/* get a block of the appropriate size... */
2622 	bp = geteblk((int)dsize);
2623 	bp->b_dev = dev;
2624 
2625 	/* get our ducks in a row for the write */
2626 	bp->b_blkno = offset / DEV_BSIZE;
2627 	bp->b_bcount = dsize;
2628 	bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2629  	bp->b_resid = dsize;
2630 
2631 	memset(bp->b_data, 0, dsize);
2632 	memcpy(bp->b_data, data, msize);
2633 
2634 	bdev = bdevsw_lookup(bp->b_dev);
2635 	if (bdev == NULL)
2636 		return (ENXIO);
2637 	(*bdev->d_strategy)(bp);
2638 	if (asyncp)
2639 		return 0;
2640 	error = biowait(bp);
2641 	brelse(bp, 0);
2642 	if (error) {
2643 #if 1
2644 		printf("Failed to write RAID component info!\n");
2645 #endif
2646 	}
2647 
2648 	return(error);
2649 }
2650 
2651 void
2652 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2653 {
2654 	int c;
2655 
2656 	for (c = 0; c < raidPtr->numCol; c++) {
2657 		/* Skip dead disks. */
2658 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2659 			continue;
2660 		/* XXXjld: what if an error occurs here? */
2661 		raidwrite_component_area(raidPtr->Disks[c].dev,
2662 		    raidPtr->raid_cinfo[c].ci_vp, map,
2663 		    RF_PARITYMAP_NBYTE,
2664 		    rf_parity_map_offset(raidPtr),
2665 		    rf_parity_map_size(raidPtr), 0);
2666 	}
2667 }
2668 
2669 void
2670 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2671 {
2672 	struct rf_paritymap_ondisk tmp;
2673 	int c,first;
2674 
2675 	first=1;
2676 	for (c = 0; c < raidPtr->numCol; c++) {
2677 		/* Skip dead disks. */
2678 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2679 			continue;
2680 		raidread_component_area(raidPtr->Disks[c].dev,
2681 		    raidPtr->raid_cinfo[c].ci_vp, &tmp,
2682 		    RF_PARITYMAP_NBYTE,
2683 		    rf_parity_map_offset(raidPtr),
2684 		    rf_parity_map_size(raidPtr));
2685 		if (first) {
2686 			memcpy(map, &tmp, sizeof(*map));
2687 			first = 0;
2688 		} else {
2689 			rf_paritymap_merge(map, &tmp);
2690 		}
2691 	}
2692 }
2693 
2694 void
2695 rf_markalldirty(RF_Raid_t *raidPtr)
2696 {
2697 	RF_ComponentLabel_t *clabel;
2698 	int sparecol;
2699 	int c;
2700 	int j;
2701 	int scol = -1;
2702 
2703 	raidPtr->mod_counter++;
2704 	for (c = 0; c < raidPtr->numCol; c++) {
2705 		/* we don't want to touch (at all) a disk that has
2706 		   failed */
2707 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2708 			clabel = raidget_component_label(raidPtr, c);
2709 			if (clabel->status == rf_ds_spared) {
2710 				/* XXX do something special...
2711 				   but whatever you do, don't
2712 				   try to access it!! */
2713 			} else {
2714 				raidmarkdirty(raidPtr, c);
2715 			}
2716 		}
2717 	}
2718 
2719 	for( c = 0; c < raidPtr->numSpare ; c++) {
2720 		sparecol = raidPtr->numCol + c;
2721 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2722 			/*
2723 
2724 			   we claim this disk is "optimal" if it's
2725 			   rf_ds_used_spare, as that means it should be
2726 			   directly substitutable for the disk it replaced.
2727 			   We note that too...
2728 
2729 			 */
2730 
2731 			for(j=0;j<raidPtr->numCol;j++) {
2732 				if (raidPtr->Disks[j].spareCol == sparecol) {
2733 					scol = j;
2734 					break;
2735 				}
2736 			}
2737 
2738 			clabel = raidget_component_label(raidPtr, sparecol);
2739 			/* make sure status is noted */
2740 
2741 			raid_init_component_label(raidPtr, clabel);
2742 
2743 			clabel->row = 0;
2744 			clabel->column = scol;
2745 			/* Note: we *don't* change status from rf_ds_used_spare
2746 			   to rf_ds_optimal */
2747 			/* clabel.status = rf_ds_optimal; */
2748 
2749 			raidmarkdirty(raidPtr, sparecol);
2750 		}
2751 	}
2752 }
2753 
2754 
2755 void
2756 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2757 {
2758 	RF_ComponentLabel_t *clabel;
2759 	int sparecol;
2760 	int c;
2761 	int j;
2762 	int scol;
2763 
2764 	scol = -1;
2765 
2766 	/* XXX should do extra checks to make sure things really are clean,
2767 	   rather than blindly setting the clean bit... */
2768 
2769 	raidPtr->mod_counter++;
2770 
2771 	for (c = 0; c < raidPtr->numCol; c++) {
2772 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
2773 			clabel = raidget_component_label(raidPtr, c);
2774 			/* make sure status is noted */
2775 			clabel->status = rf_ds_optimal;
2776 
2777 			/* note what unit we are configured as */
2778 			clabel->last_unit = raidPtr->raidid;
2779 
2780 			raidflush_component_label(raidPtr, c);
2781 			if (final == RF_FINAL_COMPONENT_UPDATE) {
2782 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
2783 					raidmarkclean(raidPtr, c);
2784 				}
2785 			}
2786 		}
2787 		/* else we don't touch it.. */
2788 	}
2789 
2790 	for( c = 0; c < raidPtr->numSpare ; c++) {
2791 		sparecol = raidPtr->numCol + c;
2792 		/* Need to ensure that the reconstruct actually completed! */
2793 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2794 			/*
2795 
2796 			   we claim this disk is "optimal" if it's
2797 			   rf_ds_used_spare, as that means it should be
2798 			   directly substitutable for the disk it replaced.
2799 			   We note that too...
2800 
2801 			 */
2802 
2803 			for(j=0;j<raidPtr->numCol;j++) {
2804 				if (raidPtr->Disks[j].spareCol == sparecol) {
2805 					scol = j;
2806 					break;
2807 				}
2808 			}
2809 
2810 			/* XXX shouldn't *really* need this... */
2811 			clabel = raidget_component_label(raidPtr, sparecol);
2812 			/* make sure status is noted */
2813 
2814 			raid_init_component_label(raidPtr, clabel);
2815 
2816 			clabel->column = scol;
2817 			clabel->status = rf_ds_optimal;
2818 			clabel->last_unit = raidPtr->raidid;
2819 
2820 			raidflush_component_label(raidPtr, sparecol);
2821 			if (final == RF_FINAL_COMPONENT_UPDATE) {
2822 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
2823 					raidmarkclean(raidPtr, sparecol);
2824 				}
2825 			}
2826 		}
2827 	}
2828 }
2829 
2830 void
2831 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2832 {
2833 
2834 	if (vp != NULL) {
2835 		if (auto_configured == 1) {
2836 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2837 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2838 			vput(vp);
2839 
2840 		} else {
2841 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2842 		}
2843 	}
2844 }
2845 
2846 
2847 void
2848 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2849 {
2850 	int r,c;
2851 	struct vnode *vp;
2852 	int acd;
2853 
2854 
2855 	/* We take this opportunity to close the vnodes like we should.. */
2856 
2857 	for (c = 0; c < raidPtr->numCol; c++) {
2858 		vp = raidPtr->raid_cinfo[c].ci_vp;
2859 		acd = raidPtr->Disks[c].auto_configured;
2860 		rf_close_component(raidPtr, vp, acd);
2861 		raidPtr->raid_cinfo[c].ci_vp = NULL;
2862 		raidPtr->Disks[c].auto_configured = 0;
2863 	}
2864 
2865 	for (r = 0; r < raidPtr->numSpare; r++) {
2866 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2867 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2868 		rf_close_component(raidPtr, vp, acd);
2869 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2870 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2871 	}
2872 }
2873 
2874 
2875 void
2876 rf_ReconThread(struct rf_recon_req *req)
2877 {
2878 	int     s;
2879 	RF_Raid_t *raidPtr;
2880 
2881 	s = splbio();
2882 	raidPtr = (RF_Raid_t *) req->raidPtr;
2883 	raidPtr->recon_in_progress = 1;
2884 
2885 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2886 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2887 
2888 	RF_Free(req, sizeof(*req));
2889 
2890 	raidPtr->recon_in_progress = 0;
2891 	splx(s);
2892 
2893 	/* That's all... */
2894 	kthread_exit(0);	/* does not return */
2895 }
2896 
2897 void
2898 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2899 {
2900 	int retcode;
2901 	int s;
2902 
2903 	raidPtr->parity_rewrite_stripes_done = 0;
2904 	raidPtr->parity_rewrite_in_progress = 1;
2905 	s = splbio();
2906 	retcode = rf_RewriteParity(raidPtr);
2907 	splx(s);
2908 	if (retcode) {
2909 		printf("raid%d: Error re-writing parity (%d)!\n",
2910 		    raidPtr->raidid, retcode);
2911 	} else {
2912 		/* set the clean bit!  If we shutdown correctly,
2913 		   the clean bit on each component label will get
2914 		   set */
2915 		raidPtr->parity_good = RF_RAID_CLEAN;
2916 	}
2917 	raidPtr->parity_rewrite_in_progress = 0;
2918 
2919 	/* Anyone waiting for us to stop?  If so, inform them... */
2920 	if (raidPtr->waitShutdown) {
2921 		wakeup(&raidPtr->parity_rewrite_in_progress);
2922 	}
2923 
2924 	/* That's all... */
2925 	kthread_exit(0);	/* does not return */
2926 }
2927 
2928 
2929 void
2930 rf_CopybackThread(RF_Raid_t *raidPtr)
2931 {
2932 	int s;
2933 
2934 	raidPtr->copyback_in_progress = 1;
2935 	s = splbio();
2936 	rf_CopybackReconstructedData(raidPtr);
2937 	splx(s);
2938 	raidPtr->copyback_in_progress = 0;
2939 
2940 	/* That's all... */
2941 	kthread_exit(0);	/* does not return */
2942 }
2943 
2944 
2945 void
2946 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2947 {
2948 	int s;
2949 	RF_Raid_t *raidPtr;
2950 
2951 	s = splbio();
2952 	raidPtr = req->raidPtr;
2953 	raidPtr->recon_in_progress = 1;
2954 	rf_ReconstructInPlace(raidPtr, req->col);
2955 	RF_Free(req, sizeof(*req));
2956 	raidPtr->recon_in_progress = 0;
2957 	splx(s);
2958 
2959 	/* That's all... */
2960 	kthread_exit(0);	/* does not return */
2961 }
2962 
2963 static RF_AutoConfig_t *
2964 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2965     const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2966     unsigned secsize)
2967 {
2968 	int good_one = 0;
2969 	RF_ComponentLabel_t *clabel;
2970 	RF_AutoConfig_t *ac;
2971 
2972 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2973 	if (clabel == NULL) {
2974 oomem:
2975 		    while(ac_list) {
2976 			    ac = ac_list;
2977 			    if (ac->clabel)
2978 				    free(ac->clabel, M_RAIDFRAME);
2979 			    ac_list = ac_list->next;
2980 			    free(ac, M_RAIDFRAME);
2981 		    }
2982 		    printf("RAID auto config: out of memory!\n");
2983 		    return NULL; /* XXX probably should panic? */
2984 	}
2985 
2986 	if (!raidread_component_label(secsize, dev, vp, clabel)) {
2987 		/* Got the label.  Does it look reasonable? */
2988 		if (rf_reasonable_label(clabel) &&
2989 		    (clabel->partitionSize <= size)) {
2990 			rf_fix_old_label_size(clabel, numsecs);
2991 #ifdef DEBUG
2992 			printf("Component on: %s: %llu\n",
2993 				cname, (unsigned long long)size);
2994 			rf_print_component_label(clabel);
2995 #endif
2996 			/* if it's reasonable, add it, else ignore it. */
2997 			ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
2998 				M_NOWAIT);
2999 			if (ac == NULL) {
3000 				free(clabel, M_RAIDFRAME);
3001 				goto oomem;
3002 			}
3003 			strlcpy(ac->devname, cname, sizeof(ac->devname));
3004 			ac->dev = dev;
3005 			ac->vp = vp;
3006 			ac->clabel = clabel;
3007 			ac->next = ac_list;
3008 			ac_list = ac;
3009 			good_one = 1;
3010 		}
3011 	}
3012 	if (!good_one) {
3013 		/* cleanup */
3014 		free(clabel, M_RAIDFRAME);
3015 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3016 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3017 		vput(vp);
3018 	}
3019 	return ac_list;
3020 }
3021 
3022 RF_AutoConfig_t *
3023 rf_find_raid_components(void)
3024 {
3025 	struct vnode *vp;
3026 	struct disklabel label;
3027 	device_t dv;
3028 	deviter_t di;
3029 	dev_t dev;
3030 	int bmajor, bminor, wedge;
3031 	int error;
3032 	int i;
3033 	RF_AutoConfig_t *ac_list;
3034 	uint64_t numsecs;
3035 	unsigned secsize;
3036 
3037 	RF_ASSERT(raidPtr->bytesPerSector < rf_component_info_offset());
3038 
3039 	/* initialize the AutoConfig list */
3040 	ac_list = NULL;
3041 
3042 	/* we begin by trolling through *all* the devices on the system */
3043 
3044 	for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3045 	     dv = deviter_next(&di)) {
3046 
3047 		/* we are only interested in disks... */
3048 		if (device_class(dv) != DV_DISK)
3049 			continue;
3050 
3051 		/* we don't care about floppies... */
3052 		if (device_is_a(dv, "fd")) {
3053 			continue;
3054 		}
3055 
3056 		/* we don't care about CD's... */
3057 		if (device_is_a(dv, "cd")) {
3058 			continue;
3059 		}
3060 
3061 		/* we don't care about md's... */
3062 		if (device_is_a(dv, "md")) {
3063 			continue;
3064 		}
3065 
3066 		/* hdfd is the Atari/Hades floppy driver */
3067 		if (device_is_a(dv, "hdfd")) {
3068 			continue;
3069 		}
3070 
3071 		/* fdisa is the Atari/Milan floppy driver */
3072 		if (device_is_a(dv, "fdisa")) {
3073 			continue;
3074 		}
3075 
3076 		/* need to find the device_name_to_block_device_major stuff */
3077 		bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3078 
3079 		/* get a vnode for the raw partition of this disk */
3080 
3081 		wedge = device_is_a(dv, "dk");
3082 		bminor = minor(device_unit(dv));
3083 		dev = wedge ? makedev(bmajor, bminor) :
3084 		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
3085 		if (bdevvp(dev, &vp))
3086 			panic("RAID can't alloc vnode");
3087 
3088 		error = VOP_OPEN(vp, FREAD, NOCRED);
3089 
3090 		if (error) {
3091 			/* "Who cares."  Continue looking
3092 			   for something that exists*/
3093 			vput(vp);
3094 			continue;
3095 		}
3096 
3097 		error = getdisksize(vp, &numsecs, &secsize);
3098 		if (error) {
3099 			vput(vp);
3100 			continue;
3101 		}
3102 		if (wedge) {
3103 			struct dkwedge_info dkw;
3104 			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3105 			    NOCRED);
3106 			if (error) {
3107 				printf("RAIDframe: can't get wedge info for "
3108 				    "dev %s (%d)\n", device_xname(dv), error);
3109 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3110 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3111 				vput(vp);
3112 				continue;
3113 			}
3114 
3115 			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3116 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3117 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3118 				vput(vp);
3119 				continue;
3120 			}
3121 
3122 			ac_list = rf_get_component(ac_list, dev, vp,
3123 			    device_xname(dv), dkw.dkw_size, numsecs, secsize);
3124 			continue;
3125 		}
3126 
3127 		/* Ok, the disk exists.  Go get the disklabel. */
3128 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3129 		if (error) {
3130 			/*
3131 			 * XXX can't happen - open() would
3132 			 * have errored out (or faked up one)
3133 			 */
3134 			if (error != ENOTTY)
3135 				printf("RAIDframe: can't get label for dev "
3136 				    "%s (%d)\n", device_xname(dv), error);
3137 		}
3138 
3139 		/* don't need this any more.  We'll allocate it again
3140 		   a little later if we really do... */
3141 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3142 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3143 		vput(vp);
3144 
3145 		if (error)
3146 			continue;
3147 
3148 		for (i = 0; i < label.d_npartitions; i++) {
3149 			char cname[sizeof(ac_list->devname)];
3150 
3151 			/* We only support partitions marked as RAID */
3152 			if (label.d_partitions[i].p_fstype != FS_RAID)
3153 				continue;
3154 
3155 			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3156 			if (bdevvp(dev, &vp))
3157 				panic("RAID can't alloc vnode");
3158 
3159 			error = VOP_OPEN(vp, FREAD, NOCRED);
3160 			if (error) {
3161 				/* Whatever... */
3162 				vput(vp);
3163 				continue;
3164 			}
3165 			snprintf(cname, sizeof(cname), "%s%c",
3166 			    device_xname(dv), 'a' + i);
3167 			ac_list = rf_get_component(ac_list, dev, vp, cname,
3168 				label.d_partitions[i].p_size, numsecs, secsize);
3169 		}
3170 	}
3171 	deviter_release(&di);
3172 	return ac_list;
3173 }
3174 
3175 
3176 static int
3177 rf_reasonable_label(RF_ComponentLabel_t *clabel)
3178 {
3179 
3180 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3181 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3182 	    ((clabel->clean == RF_RAID_CLEAN) ||
3183 	     (clabel->clean == RF_RAID_DIRTY)) &&
3184 	    clabel->row >=0 &&
3185 	    clabel->column >= 0 &&
3186 	    clabel->num_rows > 0 &&
3187 	    clabel->num_columns > 0 &&
3188 	    clabel->row < clabel->num_rows &&
3189 	    clabel->column < clabel->num_columns &&
3190 	    clabel->blockSize > 0 &&
3191 	    clabel->numBlocks > 0) {
3192 		/* label looks reasonable enough... */
3193 		return(1);
3194 	}
3195 	return(0);
3196 }
3197 
3198 
3199 /*
3200  * For reasons yet unknown, some old component labels have garbage in
3201  * the newer numBlocksHi region, and this causes lossage.  Since those
3202  * disks will also have numsecs set to less than 32 bits of sectors,
3203  * we can determine when this corruption has occured, and fix it.
3204  */
3205 static void
3206 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3207 {
3208 
3209 	if (clabel->numBlocksHi && numsecs < ((uint64_t)1 << 32)) {
3210 		printf("WARNING: total sectors < 32 bits, yet numBlocksHi set\n"
3211 		       "WARNING: resetting numBlocksHi to zero.\n");
3212 		clabel->numBlocksHi = 0;
3213 	}
3214 }
3215 
3216 
3217 #ifdef DEBUG
3218 void
3219 rf_print_component_label(RF_ComponentLabel_t *clabel)
3220 {
3221 	uint64_t numBlocks = clabel->numBlocks;
3222 
3223 	numBlocks |= (uint64_t)clabel->numBlocksHi << 32;
3224 
3225 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3226 	       clabel->row, clabel->column,
3227 	       clabel->num_rows, clabel->num_columns);
3228 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
3229 	       clabel->version, clabel->serial_number,
3230 	       clabel->mod_counter);
3231 	printf("   Clean: %s Status: %d\n",
3232 	       clabel->clean ? "Yes" : "No", clabel->status);
3233 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3234 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3235 	printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
3236 	       (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3237 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3238 	printf("   Contains root partition: %s\n",
3239 	       clabel->root_partition ? "Yes" : "No");
3240 	printf("   Last configured as: raid%d\n", clabel->last_unit);
3241 #if 0
3242 	   printf("   Config order: %d\n", clabel->config_order);
3243 #endif
3244 
3245 }
3246 #endif
3247 
3248 RF_ConfigSet_t *
3249 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3250 {
3251 	RF_AutoConfig_t *ac;
3252 	RF_ConfigSet_t *config_sets;
3253 	RF_ConfigSet_t *cset;
3254 	RF_AutoConfig_t *ac_next;
3255 
3256 
3257 	config_sets = NULL;
3258 
3259 	/* Go through the AutoConfig list, and figure out which components
3260 	   belong to what sets.  */
3261 	ac = ac_list;
3262 	while(ac!=NULL) {
3263 		/* we're going to putz with ac->next, so save it here
3264 		   for use at the end of the loop */
3265 		ac_next = ac->next;
3266 
3267 		if (config_sets == NULL) {
3268 			/* will need at least this one... */
3269 			config_sets = (RF_ConfigSet_t *)
3270 				malloc(sizeof(RF_ConfigSet_t),
3271 				       M_RAIDFRAME, M_NOWAIT);
3272 			if (config_sets == NULL) {
3273 				panic("rf_create_auto_sets: No memory!");
3274 			}
3275 			/* this one is easy :) */
3276 			config_sets->ac = ac;
3277 			config_sets->next = NULL;
3278 			config_sets->rootable = 0;
3279 			ac->next = NULL;
3280 		} else {
3281 			/* which set does this component fit into? */
3282 			cset = config_sets;
3283 			while(cset!=NULL) {
3284 				if (rf_does_it_fit(cset, ac)) {
3285 					/* looks like it matches... */
3286 					ac->next = cset->ac;
3287 					cset->ac = ac;
3288 					break;
3289 				}
3290 				cset = cset->next;
3291 			}
3292 			if (cset==NULL) {
3293 				/* didn't find a match above... new set..*/
3294 				cset = (RF_ConfigSet_t *)
3295 					malloc(sizeof(RF_ConfigSet_t),
3296 					       M_RAIDFRAME, M_NOWAIT);
3297 				if (cset == NULL) {
3298 					panic("rf_create_auto_sets: No memory!");
3299 				}
3300 				cset->ac = ac;
3301 				ac->next = NULL;
3302 				cset->next = config_sets;
3303 				cset->rootable = 0;
3304 				config_sets = cset;
3305 			}
3306 		}
3307 		ac = ac_next;
3308 	}
3309 
3310 
3311 	return(config_sets);
3312 }
3313 
3314 static int
3315 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3316 {
3317 	RF_ComponentLabel_t *clabel1, *clabel2;
3318 
3319 	/* If this one matches the *first* one in the set, that's good
3320 	   enough, since the other members of the set would have been
3321 	   through here too... */
3322 	/* note that we are not checking partitionSize here..
3323 
3324 	   Note that we are also not checking the mod_counters here.
3325 	   If everything else matches execpt the mod_counter, that's
3326 	   good enough for this test.  We will deal with the mod_counters
3327 	   a little later in the autoconfiguration process.
3328 
3329 	    (clabel1->mod_counter == clabel2->mod_counter) &&
3330 
3331 	   The reason we don't check for this is that failed disks
3332 	   will have lower modification counts.  If those disks are
3333 	   not added to the set they used to belong to, then they will
3334 	   form their own set, which may result in 2 different sets,
3335 	   for example, competing to be configured at raid0, and
3336 	   perhaps competing to be the root filesystem set.  If the
3337 	   wrong ones get configured, or both attempt to become /,
3338 	   weird behaviour and or serious lossage will occur.  Thus we
3339 	   need to bring them into the fold here, and kick them out at
3340 	   a later point.
3341 
3342 	*/
3343 
3344 	clabel1 = cset->ac->clabel;
3345 	clabel2 = ac->clabel;
3346 	if ((clabel1->version == clabel2->version) &&
3347 	    (clabel1->serial_number == clabel2->serial_number) &&
3348 	    (clabel1->num_rows == clabel2->num_rows) &&
3349 	    (clabel1->num_columns == clabel2->num_columns) &&
3350 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
3351 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3352 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3353 	    (clabel1->parityConfig == clabel2->parityConfig) &&
3354 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3355 	    (clabel1->blockSize == clabel2->blockSize) &&
3356 	    (clabel1->numBlocks == clabel2->numBlocks) &&
3357 	    (clabel1->numBlocksHi == clabel2->numBlocksHi) &&
3358 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
3359 	    (clabel1->root_partition == clabel2->root_partition) &&
3360 	    (clabel1->last_unit == clabel2->last_unit) &&
3361 	    (clabel1->config_order == clabel2->config_order)) {
3362 		/* if it get's here, it almost *has* to be a match */
3363 	} else {
3364 		/* it's not consistent with somebody in the set..
3365 		   punt */
3366 		return(0);
3367 	}
3368 	/* all was fine.. it must fit... */
3369 	return(1);
3370 }
3371 
3372 int
3373 rf_have_enough_components(RF_ConfigSet_t *cset)
3374 {
3375 	RF_AutoConfig_t *ac;
3376 	RF_AutoConfig_t *auto_config;
3377 	RF_ComponentLabel_t *clabel;
3378 	int c;
3379 	int num_cols;
3380 	int num_missing;
3381 	int mod_counter;
3382 	int mod_counter_found;
3383 	int even_pair_failed;
3384 	char parity_type;
3385 
3386 
3387 	/* check to see that we have enough 'live' components
3388 	   of this set.  If so, we can configure it if necessary */
3389 
3390 	num_cols = cset->ac->clabel->num_columns;
3391 	parity_type = cset->ac->clabel->parityConfig;
3392 
3393 	/* XXX Check for duplicate components!?!?!? */
3394 
3395 	/* Determine what the mod_counter is supposed to be for this set. */
3396 
3397 	mod_counter_found = 0;
3398 	mod_counter = 0;
3399 	ac = cset->ac;
3400 	while(ac!=NULL) {
3401 		if (mod_counter_found==0) {
3402 			mod_counter = ac->clabel->mod_counter;
3403 			mod_counter_found = 1;
3404 		} else {
3405 			if (ac->clabel->mod_counter > mod_counter) {
3406 				mod_counter = ac->clabel->mod_counter;
3407 			}
3408 		}
3409 		ac = ac->next;
3410 	}
3411 
3412 	num_missing = 0;
3413 	auto_config = cset->ac;
3414 
3415 	even_pair_failed = 0;
3416 	for(c=0; c<num_cols; c++) {
3417 		ac = auto_config;
3418 		while(ac!=NULL) {
3419 			if ((ac->clabel->column == c) &&
3420 			    (ac->clabel->mod_counter == mod_counter)) {
3421 				/* it's this one... */
3422 #ifdef DEBUG
3423 				printf("Found: %s at %d\n",
3424 				       ac->devname,c);
3425 #endif
3426 				break;
3427 			}
3428 			ac=ac->next;
3429 		}
3430 		if (ac==NULL) {
3431 				/* Didn't find one here! */
3432 				/* special case for RAID 1, especially
3433 				   where there are more than 2
3434 				   components (where RAIDframe treats
3435 				   things a little differently :( ) */
3436 			if (parity_type == '1') {
3437 				if (c%2 == 0) { /* even component */
3438 					even_pair_failed = 1;
3439 				} else { /* odd component.  If
3440 					    we're failed, and
3441 					    so is the even
3442 					    component, it's
3443 					    "Good Night, Charlie" */
3444 					if (even_pair_failed == 1) {
3445 						return(0);
3446 					}
3447 				}
3448 			} else {
3449 				/* normal accounting */
3450 				num_missing++;
3451 			}
3452 		}
3453 		if ((parity_type == '1') && (c%2 == 1)) {
3454 				/* Just did an even component, and we didn't
3455 				   bail.. reset the even_pair_failed flag,
3456 				   and go on to the next component.... */
3457 			even_pair_failed = 0;
3458 		}
3459 	}
3460 
3461 	clabel = cset->ac->clabel;
3462 
3463 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3464 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3465 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
3466 		/* XXX this needs to be made *much* more general */
3467 		/* Too many failures */
3468 		return(0);
3469 	}
3470 	/* otherwise, all is well, and we've got enough to take a kick
3471 	   at autoconfiguring this set */
3472 	return(1);
3473 }
3474 
3475 void
3476 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3477 			RF_Raid_t *raidPtr)
3478 {
3479 	RF_ComponentLabel_t *clabel;
3480 	int i;
3481 
3482 	clabel = ac->clabel;
3483 
3484 	/* 1. Fill in the common stuff */
3485 	config->numRow = clabel->num_rows = 1;
3486 	config->numCol = clabel->num_columns;
3487 	config->numSpare = 0; /* XXX should this be set here? */
3488 	config->sectPerSU = clabel->sectPerSU;
3489 	config->SUsPerPU = clabel->SUsPerPU;
3490 	config->SUsPerRU = clabel->SUsPerRU;
3491 	config->parityConfig = clabel->parityConfig;
3492 	/* XXX... */
3493 	strcpy(config->diskQueueType,"fifo");
3494 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3495 	config->layoutSpecificSize = 0; /* XXX ?? */
3496 
3497 	while(ac!=NULL) {
3498 		/* row/col values will be in range due to the checks
3499 		   in reasonable_label() */
3500 		strcpy(config->devnames[0][ac->clabel->column],
3501 		       ac->devname);
3502 		ac = ac->next;
3503 	}
3504 
3505 	for(i=0;i<RF_MAXDBGV;i++) {
3506 		config->debugVars[i][0] = 0;
3507 	}
3508 }
3509 
3510 int
3511 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3512 {
3513 	RF_ComponentLabel_t *clabel;
3514 	int column;
3515 	int sparecol;
3516 
3517 	raidPtr->autoconfigure = new_value;
3518 
3519 	for(column=0; column<raidPtr->numCol; column++) {
3520 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
3521 			clabel = raidget_component_label(raidPtr, column);
3522 			clabel->autoconfigure = new_value;
3523 			raidflush_component_label(raidPtr, column);
3524 		}
3525 	}
3526 	for(column = 0; column < raidPtr->numSpare ; column++) {
3527 		sparecol = raidPtr->numCol + column;
3528 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3529 			clabel = raidget_component_label(raidPtr, sparecol);
3530 			clabel->autoconfigure = new_value;
3531 			raidflush_component_label(raidPtr, sparecol);
3532 		}
3533 	}
3534 	return(new_value);
3535 }
3536 
3537 int
3538 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3539 {
3540 	RF_ComponentLabel_t *clabel;
3541 	int column;
3542 	int sparecol;
3543 
3544 	raidPtr->root_partition = new_value;
3545 	for(column=0; column<raidPtr->numCol; column++) {
3546 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
3547 			clabel = raidget_component_label(raidPtr, column);
3548 			clabel->root_partition = new_value;
3549 			raidflush_component_label(raidPtr, column);
3550 		}
3551 	}
3552 	for(column = 0; column < raidPtr->numSpare ; column++) {
3553 		sparecol = raidPtr->numCol + column;
3554 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3555 			clabel = raidget_component_label(raidPtr, sparecol);
3556 			clabel->root_partition = new_value;
3557 			raidflush_component_label(raidPtr, sparecol);
3558 		}
3559 	}
3560 	return(new_value);
3561 }
3562 
3563 void
3564 rf_release_all_vps(RF_ConfigSet_t *cset)
3565 {
3566 	RF_AutoConfig_t *ac;
3567 
3568 	ac = cset->ac;
3569 	while(ac!=NULL) {
3570 		/* Close the vp, and give it back */
3571 		if (ac->vp) {
3572 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3573 			VOP_CLOSE(ac->vp, FREAD, NOCRED);
3574 			vput(ac->vp);
3575 			ac->vp = NULL;
3576 		}
3577 		ac = ac->next;
3578 	}
3579 }
3580 
3581 
3582 void
3583 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3584 {
3585 	RF_AutoConfig_t *ac;
3586 	RF_AutoConfig_t *next_ac;
3587 
3588 	ac = cset->ac;
3589 	while(ac!=NULL) {
3590 		next_ac = ac->next;
3591 		/* nuke the label */
3592 		free(ac->clabel, M_RAIDFRAME);
3593 		/* cleanup the config structure */
3594 		free(ac, M_RAIDFRAME);
3595 		/* "next.." */
3596 		ac = next_ac;
3597 	}
3598 	/* and, finally, nuke the config set */
3599 	free(cset, M_RAIDFRAME);
3600 }
3601 
3602 
3603 void
3604 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3605 {
3606 	/* current version number */
3607 	clabel->version = RF_COMPONENT_LABEL_VERSION;
3608 	clabel->serial_number = raidPtr->serial_number;
3609 	clabel->mod_counter = raidPtr->mod_counter;
3610 
3611 	clabel->num_rows = 1;
3612 	clabel->num_columns = raidPtr->numCol;
3613 	clabel->clean = RF_RAID_DIRTY; /* not clean */
3614 	clabel->status = rf_ds_optimal; /* "It's good!" */
3615 
3616 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3617 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3618 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3619 
3620 	clabel->blockSize = raidPtr->bytesPerSector;
3621 	clabel->numBlocks = raidPtr->sectorsPerDisk;
3622 	clabel->numBlocksHi = raidPtr->sectorsPerDisk >> 32;
3623 
3624 	/* XXX not portable */
3625 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3626 	clabel->maxOutstanding = raidPtr->maxOutstanding;
3627 	clabel->autoconfigure = raidPtr->autoconfigure;
3628 	clabel->root_partition = raidPtr->root_partition;
3629 	clabel->last_unit = raidPtr->raidid;
3630 	clabel->config_order = raidPtr->config_order;
3631 
3632 #ifndef RF_NO_PARITY_MAP
3633 	rf_paritymap_init_label(raidPtr->parity_map, clabel);
3634 #endif
3635 }
3636 
3637 int
3638 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3639 {
3640 	RF_Raid_t *raidPtr;
3641 	RF_Config_t *config;
3642 	int raidID;
3643 	int retcode;
3644 
3645 #ifdef DEBUG
3646 	printf("RAID autoconfigure\n");
3647 #endif
3648 
3649 	retcode = 0;
3650 	*unit = -1;
3651 
3652 	/* 1. Create a config structure */
3653 
3654 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3655 				       M_RAIDFRAME,
3656 				       M_NOWAIT);
3657 	if (config==NULL) {
3658 		printf("Out of mem!?!?\n");
3659 				/* XXX do something more intelligent here. */
3660 		return(1);
3661 	}
3662 
3663 	memset(config, 0, sizeof(RF_Config_t));
3664 
3665 	/*
3666 	   2. Figure out what RAID ID this one is supposed to live at
3667 	   See if we can get the same RAID dev that it was configured
3668 	   on last time..
3669 	*/
3670 
3671 	raidID = cset->ac->clabel->last_unit;
3672 	if ((raidID < 0) || (raidID >= numraid)) {
3673 		/* let's not wander off into lala land. */
3674 		raidID = numraid - 1;
3675 	}
3676 	if (raidPtrs[raidID]->valid != 0) {
3677 
3678 		/*
3679 		   Nope... Go looking for an alternative...
3680 		   Start high so we don't immediately use raid0 if that's
3681 		   not taken.
3682 		*/
3683 
3684 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
3685 			if (raidPtrs[raidID]->valid == 0) {
3686 				/* can use this one! */
3687 				break;
3688 			}
3689 		}
3690 	}
3691 
3692 	if (raidID < 0) {
3693 		/* punt... */
3694 		printf("Unable to auto configure this set!\n");
3695 		printf("(Out of RAID devs!)\n");
3696 		free(config, M_RAIDFRAME);
3697 		return(1);
3698 	}
3699 
3700 #ifdef DEBUG
3701 	printf("Configuring raid%d:\n",raidID);
3702 #endif
3703 
3704 	raidPtr = raidPtrs[raidID];
3705 
3706 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
3707 	raidPtr->raidid = raidID;
3708 	raidPtr->openings = RAIDOUTSTANDING;
3709 
3710 	/* 3. Build the configuration structure */
3711 	rf_create_configuration(cset->ac, config, raidPtr);
3712 
3713 	/* 4. Do the configuration */
3714 	retcode = rf_Configure(raidPtr, config, cset->ac);
3715 
3716 	if (retcode == 0) {
3717 
3718 		raidinit(raidPtrs[raidID]);
3719 
3720 		rf_markalldirty(raidPtrs[raidID]);
3721 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3722 		if (cset->ac->clabel->root_partition==1) {
3723 			/* everything configured just fine.  Make a note
3724 			   that this set is eligible to be root. */
3725 			cset->rootable = 1;
3726 			/* XXX do this here? */
3727 			raidPtrs[raidID]->root_partition = 1;
3728 		}
3729 	}
3730 
3731 	/* 5. Cleanup */
3732 	free(config, M_RAIDFRAME);
3733 
3734 	*unit = raidID;
3735 	return(retcode);
3736 }
3737 
3738 void
3739 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3740 {
3741 	struct buf *bp;
3742 
3743 	bp = (struct buf *)desc->bp;
3744 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3745 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3746 }
3747 
3748 void
3749 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3750 	     size_t xmin, size_t xmax)
3751 {
3752 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3753 	pool_sethiwat(p, xmax);
3754 	pool_prime(p, xmin);
3755 	pool_setlowat(p, xmin);
3756 }
3757 
3758 /*
3759  * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3760  * if there is IO pending and if that IO could possibly be done for a
3761  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
3762  * otherwise.
3763  *
3764  */
3765 
3766 int
3767 rf_buf_queue_check(int raidid)
3768 {
3769 	if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) &&
3770 	    raidPtrs[raidid]->openings > 0) {
3771 		/* there is work to do */
3772 		return 0;
3773 	}
3774 	/* default is nothing to do */
3775 	return 1;
3776 }
3777 
3778 int
3779 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr)
3780 {
3781 	uint64_t numsecs;
3782 	unsigned secsize;
3783 	int error;
3784 
3785 	error = getdisksize(vp, &numsecs, &secsize);
3786 	if (error == 0) {
3787 		diskPtr->blockSize = secsize;
3788 		diskPtr->numBlocks = numsecs - rf_protectedSectors;
3789 		diskPtr->partitionSize = numsecs;
3790 		return 0;
3791 	}
3792 	return error;
3793 }
3794 
3795 static int
3796 raid_match(device_t self, cfdata_t cfdata, void *aux)
3797 {
3798 	return 1;
3799 }
3800 
3801 static void
3802 raid_attach(device_t parent, device_t self, void *aux)
3803 {
3804 
3805 }
3806 
3807 
3808 static int
3809 raid_detach(device_t self, int flags)
3810 {
3811 	int error;
3812 	struct raid_softc *rs = &raid_softc[device_unit(self)];
3813 
3814 	if ((error = raidlock(rs)) != 0)
3815 		return (error);
3816 
3817 	error = raid_detach_unlocked(rs);
3818 
3819 	raidunlock(rs);
3820 
3821 	return error;
3822 }
3823 
3824 static void
3825 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
3826 {
3827 	prop_dictionary_t disk_info, odisk_info, geom;
3828 	disk_info = prop_dictionary_create();
3829 	geom = prop_dictionary_create();
3830 	prop_dictionary_set_uint64(geom, "sectors-per-unit",
3831 				   raidPtr->totalSectors);
3832 	prop_dictionary_set_uint32(geom, "sector-size",
3833 				   raidPtr->bytesPerSector);
3834 
3835 	prop_dictionary_set_uint16(geom, "sectors-per-track",
3836 				   raidPtr->Layout.dataSectorsPerStripe);
3837 	prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
3838 				   4 * raidPtr->numCol);
3839 
3840 	prop_dictionary_set_uint64(geom, "cylinders-per-unit",
3841 	   raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
3842 	   (4 * raidPtr->numCol)));
3843 
3844 	prop_dictionary_set(disk_info, "geometry", geom);
3845 	prop_object_release(geom);
3846 	prop_dictionary_set(device_properties(rs->sc_dev),
3847 			    "disk-info", disk_info);
3848 	odisk_info = rs->sc_dkdev.dk_info;
3849 	rs->sc_dkdev.dk_info = disk_info;
3850 	if (odisk_info)
3851 		prop_object_release(odisk_info);
3852 }
3853 
3854 /*
3855  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3856  * We end up returning whatever error was returned by the first cache flush
3857  * that fails.
3858  */
3859 
3860 int
3861 rf_sync_component_caches(RF_Raid_t *raidPtr)
3862 {
3863 	int c, sparecol;
3864 	int e,error;
3865 	int force = 1;
3866 
3867 	error = 0;
3868 	for (c = 0; c < raidPtr->numCol; c++) {
3869 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
3870 			e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3871 					  &force, FWRITE, NOCRED);
3872 			if (e) {
3873 				if (e != ENODEV)
3874 					printf("raid%d: cache flush to component %s failed.\n",
3875 					       raidPtr->raidid, raidPtr->Disks[c].devname);
3876 				if (error == 0) {
3877 					error = e;
3878 				}
3879 			}
3880 		}
3881 	}
3882 
3883 	for( c = 0; c < raidPtr->numSpare ; c++) {
3884 		sparecol = raidPtr->numCol + c;
3885 		/* Need to ensure that the reconstruct actually completed! */
3886 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3887 			e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3888 					  DIOCCACHESYNC, &force, FWRITE, NOCRED);
3889 			if (e) {
3890 				if (e != ENODEV)
3891 					printf("raid%d: cache flush to component %s failed.\n",
3892 					       raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3893 				if (error == 0) {
3894 					error = e;
3895 				}
3896 			}
3897 		}
3898 	}
3899 	return error;
3900 }
3901