xref: /netbsd-src/sys/dev/raidframe/rf_netbsdkintf.c (revision a4ddc2c8fb9af816efe3b1c375a5530aef0e89e9)
1 /*	$NetBSD: rf_netbsdkintf.c,v 1.299 2013/02/18 19:42:54 oster Exp $	*/
2 
3 /*-
4  * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Greg Oster; Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1988 University of Utah.
34  * Copyright (c) 1990, 1993
35  *      The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * the Systems Programming Group of the University of Utah Computer
39  * Science Department.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  * from: Utah $Hdr: cd.c 1.6 90/11/28$
66  *
67  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
68  */
69 
70 /*
71  * Copyright (c) 1995 Carnegie-Mellon University.
72  * All rights reserved.
73  *
74  * Authors: Mark Holland, Jim Zelenka
75  *
76  * Permission to use, copy, modify and distribute this software and
77  * its documentation is hereby granted, provided that both the copyright
78  * notice and this permission notice appear in all copies of the
79  * software, derivative works or modified versions, and any portions
80  * thereof, and that both notices appear in supporting documentation.
81  *
82  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85  *
86  * Carnegie Mellon requests users of this software to return to
87  *
88  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
89  *  School of Computer Science
90  *  Carnegie Mellon University
91  *  Pittsburgh PA 15213-3890
92  *
93  * any improvements or extensions that they make and grant Carnegie the
94  * rights to redistribute these changes.
95  */
96 
97 /***********************************************************
98  *
99  * rf_kintf.c -- the kernel interface routines for RAIDframe
100  *
101  ***********************************************************/
102 
103 #include <sys/cdefs.h>
104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.299 2013/02/18 19:42:54 oster Exp $");
105 
106 #ifdef _KERNEL_OPT
107 #include "opt_compat_netbsd.h"
108 #include "opt_raid_autoconfig.h"
109 #include "raid.h"
110 #endif
111 
112 #include <sys/param.h>
113 #include <sys/errno.h>
114 #include <sys/pool.h>
115 #include <sys/proc.h>
116 #include <sys/queue.h>
117 #include <sys/disk.h>
118 #include <sys/device.h>
119 #include <sys/stat.h>
120 #include <sys/ioctl.h>
121 #include <sys/fcntl.h>
122 #include <sys/systm.h>
123 #include <sys/vnode.h>
124 #include <sys/disklabel.h>
125 #include <sys/conf.h>
126 #include <sys/buf.h>
127 #include <sys/bufq.h>
128 #include <sys/reboot.h>
129 #include <sys/kauth.h>
130 
131 #include <prop/proplib.h>
132 
133 #include <dev/raidframe/raidframevar.h>
134 #include <dev/raidframe/raidframeio.h>
135 #include <dev/raidframe/rf_paritymap.h>
136 
137 #include "rf_raid.h"
138 #include "rf_copyback.h"
139 #include "rf_dag.h"
140 #include "rf_dagflags.h"
141 #include "rf_desc.h"
142 #include "rf_diskqueue.h"
143 #include "rf_etimer.h"
144 #include "rf_general.h"
145 #include "rf_kintf.h"
146 #include "rf_options.h"
147 #include "rf_driver.h"
148 #include "rf_parityscan.h"
149 #include "rf_threadstuff.h"
150 
151 #ifdef COMPAT_50
152 #include "rf_compat50.h"
153 #endif
154 
155 #ifdef DEBUG
156 int     rf_kdebug_level = 0;
157 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
158 #else				/* DEBUG */
159 #define db1_printf(a) { }
160 #endif				/* DEBUG */
161 
162 static RF_Raid_t **raidPtrs;	/* global raid device descriptors */
163 
164 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
165 static rf_declare_mutex2(rf_sparet_wait_mutex);
166 static rf_declare_cond2(rf_sparet_wait_cv);
167 static rf_declare_cond2(rf_sparet_resp_cv);
168 
169 static RF_SparetWait_t *rf_sparet_wait_queue;	/* requests to install a
170 						 * spare table */
171 static RF_SparetWait_t *rf_sparet_resp_queue;	/* responses from
172 						 * installation process */
173 #endif
174 
175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
176 
177 /* prototypes */
178 static void KernelWakeupFunc(struct buf *);
179 static void InitBP(struct buf *, struct vnode *, unsigned,
180     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
181     void *, int, struct proc *);
182 static void raidinit(RF_Raid_t *);
183 
184 void raidattach(int);
185 static int raid_match(device_t, cfdata_t, void *);
186 static void raid_attach(device_t, device_t, void *);
187 static int raid_detach(device_t, int);
188 
189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
190     daddr_t, daddr_t);
191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
192     daddr_t, daddr_t, int);
193 
194 static int raidwrite_component_label(unsigned,
195     dev_t, struct vnode *, RF_ComponentLabel_t *);
196 static int raidread_component_label(unsigned,
197     dev_t, struct vnode *, RF_ComponentLabel_t *);
198 
199 
200 dev_type_open(raidopen);
201 dev_type_close(raidclose);
202 dev_type_read(raidread);
203 dev_type_write(raidwrite);
204 dev_type_ioctl(raidioctl);
205 dev_type_strategy(raidstrategy);
206 dev_type_dump(raiddump);
207 dev_type_size(raidsize);
208 
209 const struct bdevsw raid_bdevsw = {
210 	raidopen, raidclose, raidstrategy, raidioctl,
211 	raiddump, raidsize, D_DISK
212 };
213 
214 const struct cdevsw raid_cdevsw = {
215 	raidopen, raidclose, raidread, raidwrite, raidioctl,
216 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
217 };
218 
219 static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
220 
221 /* XXX Not sure if the following should be replacing the raidPtrs above,
222    or if it should be used in conjunction with that...
223 */
224 
225 struct raid_softc {
226 	device_t sc_dev;
227 	int     sc_flags;	/* flags */
228 	int     sc_cflags;	/* configuration flags */
229 	uint64_t sc_size;	/* size of the raid device */
230 	char    sc_xname[20];	/* XXX external name */
231 	struct disk sc_dkdev;	/* generic disk device info */
232 	struct bufq_state *buf_queue;	/* used for the device queue */
233 };
234 /* sc_flags */
235 #define RAIDF_INITED	0x01	/* unit has been initialized */
236 #define RAIDF_WLABEL	0x02	/* label area is writable */
237 #define RAIDF_LABELLING	0x04	/* unit is currently being labelled */
238 #define RAIDF_SHUTDOWN	0x08	/* unit is being shutdown */
239 #define RAIDF_WANTED	0x40	/* someone is waiting to obtain a lock */
240 #define RAIDF_LOCKED	0x80	/* unit is locked */
241 
242 #define	raidunit(x)	DISKUNIT(x)
243 int numraid = 0;
244 
245 extern struct cfdriver raid_cd;
246 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
247     raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
248     DVF_DETACH_SHUTDOWN);
249 
250 /*
251  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
252  * Be aware that large numbers can allow the driver to consume a lot of
253  * kernel memory, especially on writes, and in degraded mode reads.
254  *
255  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
256  * a single 64K write will typically require 64K for the old data,
257  * 64K for the old parity, and 64K for the new parity, for a total
258  * of 192K (if the parity buffer is not re-used immediately).
259  * Even it if is used immediately, that's still 128K, which when multiplied
260  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
261  *
262  * Now in degraded mode, for example, a 64K read on the above setup may
263  * require data reconstruction, which will require *all* of the 4 remaining
264  * disks to participate -- 4 * 32K/disk == 128K again.
265  */
266 
267 #ifndef RAIDOUTSTANDING
268 #define RAIDOUTSTANDING   6
269 #endif
270 
271 #define RAIDLABELDEV(dev)	\
272 	(MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
273 
274 /* declared here, and made public, for the benefit of KVM stuff.. */
275 struct raid_softc *raid_softc;
276 
277 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
278 				     struct disklabel *);
279 static void raidgetdisklabel(dev_t);
280 static void raidmakedisklabel(struct raid_softc *);
281 
282 static int raidlock(struct raid_softc *);
283 static void raidunlock(struct raid_softc *);
284 
285 static int raid_detach_unlocked(struct raid_softc *);
286 
287 static void rf_markalldirty(RF_Raid_t *);
288 static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
289 
290 void rf_ReconThread(struct rf_recon_req *);
291 void rf_RewriteParityThread(RF_Raid_t *raidPtr);
292 void rf_CopybackThread(RF_Raid_t *raidPtr);
293 void rf_ReconstructInPlaceThread(struct rf_recon_req *);
294 int rf_autoconfig(device_t);
295 void rf_buildroothack(RF_ConfigSet_t *);
296 
297 RF_AutoConfig_t *rf_find_raid_components(void);
298 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
299 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
300 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
301 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
302 int rf_set_autoconfig(RF_Raid_t *, int);
303 int rf_set_rootpartition(RF_Raid_t *, int);
304 void rf_release_all_vps(RF_ConfigSet_t *);
305 void rf_cleanup_config_set(RF_ConfigSet_t *);
306 int rf_have_enough_components(RF_ConfigSet_t *);
307 int rf_auto_config_set(RF_ConfigSet_t *, int *);
308 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
309 
310 /*
311  * Debugging, mostly.  Set to 0 to not allow autoconfig to take place.
312  * Note that this is overridden by having RAID_AUTOCONFIG as an option
313  * in the kernel config file.
314  */
315 #ifdef RAID_AUTOCONFIG
316 int raidautoconfig = 1;
317 #else
318 int raidautoconfig = 0;
319 #endif
320 static bool raidautoconfigdone = false;
321 
322 struct RF_Pools_s rf_pools;
323 
324 void
325 raidattach(int num)
326 {
327 	int raidID;
328 	int i, rc;
329 
330 	aprint_debug("raidattach: Asked for %d units\n", num);
331 
332 	if (num <= 0) {
333 #ifdef DIAGNOSTIC
334 		panic("raidattach: count <= 0");
335 #endif
336 		return;
337 	}
338 	/* This is where all the initialization stuff gets done. */
339 
340 	numraid = num;
341 
342 	/* Make some space for requested number of units... */
343 
344 	RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
345 	if (raidPtrs == NULL) {
346 		panic("raidPtrs is NULL!!");
347 	}
348 
349 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
350 	rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
351 	rf_init_cond2(rf_sparet_wait_cv, "sparetw");
352 	rf_init_cond2(rf_sparet_resp_cv, "rfgst");
353 
354 	rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
355 #endif
356 
357 	for (i = 0; i < num; i++)
358 		raidPtrs[i] = NULL;
359 	rc = rf_BootRaidframe();
360 	if (rc == 0)
361 		aprint_verbose("Kernelized RAIDframe activated\n");
362 	else
363 		panic("Serious error booting RAID!!");
364 
365 	/* put together some datastructures like the CCD device does.. This
366 	 * lets us lock the device and what-not when it gets opened. */
367 
368 	raid_softc = (struct raid_softc *)
369 		malloc(num * sizeof(struct raid_softc),
370 		       M_RAIDFRAME, M_NOWAIT);
371 	if (raid_softc == NULL) {
372 		aprint_error("WARNING: no memory for RAIDframe driver\n");
373 		return;
374 	}
375 
376 	memset(raid_softc, 0, num * sizeof(struct raid_softc));
377 
378 	for (raidID = 0; raidID < num; raidID++) {
379 		bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
380 
381 		RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
382 			  (RF_Raid_t *));
383 		if (raidPtrs[raidID] == NULL) {
384 			aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID);
385 			numraid = raidID;
386 			return;
387 		}
388 	}
389 
390 	if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
391 		aprint_error("raidattach: config_cfattach_attach failed?\n");
392 	}
393 
394 	raidautoconfigdone = false;
395 
396 	/*
397 	 * Register a finalizer which will be used to auto-config RAID
398 	 * sets once all real hardware devices have been found.
399 	 */
400 	if (config_finalize_register(NULL, rf_autoconfig) != 0)
401 		aprint_error("WARNING: unable to register RAIDframe finalizer\n");
402 }
403 
404 int
405 rf_autoconfig(device_t self)
406 {
407 	RF_AutoConfig_t *ac_list;
408 	RF_ConfigSet_t *config_sets;
409 
410 	if (!raidautoconfig || raidautoconfigdone == true)
411 		return (0);
412 
413 	/* XXX This code can only be run once. */
414 	raidautoconfigdone = true;
415 
416 	/* 1. locate all RAID components on the system */
417 	aprint_debug("Searching for RAID components...\n");
418 	ac_list = rf_find_raid_components();
419 
420 	/* 2. Sort them into their respective sets. */
421 	config_sets = rf_create_auto_sets(ac_list);
422 
423 	/*
424 	 * 3. Evaluate each set and configure the valid ones.
425 	 * This gets done in rf_buildroothack().
426 	 */
427 	rf_buildroothack(config_sets);
428 
429 	return 1;
430 }
431 
432 void
433 rf_buildroothack(RF_ConfigSet_t *config_sets)
434 {
435 	RF_ConfigSet_t *cset;
436 	RF_ConfigSet_t *next_cset;
437 	int retcode;
438 	int raidID;
439 	int rootID;
440 	int col;
441 	int num_root;
442 	char *devname;
443 
444 	rootID = 0;
445 	num_root = 0;
446 	cset = config_sets;
447 	while (cset != NULL) {
448 		next_cset = cset->next;
449 		if (rf_have_enough_components(cset) &&
450 		    cset->ac->clabel->autoconfigure==1) {
451 			retcode = rf_auto_config_set(cset,&raidID);
452 			if (!retcode) {
453 				aprint_debug("raid%d: configured ok\n", raidID);
454 				if (cset->rootable) {
455 					rootID = raidID;
456 					num_root++;
457 				}
458 			} else {
459 				/* The autoconfig didn't work :( */
460 				aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
461 				rf_release_all_vps(cset);
462 			}
463 		} else {
464 			/* we're not autoconfiguring this set...
465 			   release the associated resources */
466 			rf_release_all_vps(cset);
467 		}
468 		/* cleanup */
469 		rf_cleanup_config_set(cset);
470 		cset = next_cset;
471 	}
472 
473 	/* if the user has specified what the root device should be
474 	   then we don't touch booted_device or boothowto... */
475 
476 	if (rootspec != NULL)
477 		return;
478 
479 	/* we found something bootable... */
480 
481 	if (num_root == 1) {
482 		if (raid_softc[rootID].sc_dkdev.dk_nwedges != 0) {
483 			/* XXX: How do we find the real root partition? */
484 			char cname[sizeof(cset->ac->devname)];
485 			snprintf(cname, sizeof(cname), "%s%c",
486 			    device_xname(raid_softc[rootID].sc_dev), 'a');
487 			booted_device = dkwedge_find_by_wname(cname);
488 		} else
489 			booted_device = raid_softc[rootID].sc_dev;
490 	} else if (num_root > 1) {
491 
492 		/*
493 		 * Maybe the MD code can help. If it cannot, then
494 		 * setroot() will discover that we have no
495 		 * booted_device and will ask the user if nothing was
496 		 * hardwired in the kernel config file
497 		 */
498 
499 		if (booted_device == NULL)
500 			cpu_rootconf();
501 		if (booted_device == NULL)
502 			return;
503 
504 		num_root = 0;
505 		for (raidID = 0; raidID < numraid; raidID++) {
506 			if (raidPtrs[raidID]->valid == 0)
507 				continue;
508 
509 			if (raidPtrs[raidID]->root_partition == 0)
510 				continue;
511 
512 			for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
513 				devname = raidPtrs[raidID]->Disks[col].devname;
514 				devname += sizeof("/dev/") - 1;
515 				if (strncmp(devname, device_xname(booted_device),
516 					    strlen(device_xname(booted_device))) != 0)
517 					continue;
518 				aprint_debug("raid%d includes boot device %s\n",
519 				       raidID, devname);
520 				num_root++;
521 				rootID = raidID;
522 			}
523 		}
524 
525 		if (num_root == 1) {
526 			booted_device = raid_softc[rootID].sc_dev;
527 		} else {
528 			/* we can't guess.. require the user to answer... */
529 			boothowto |= RB_ASKNAME;
530 		}
531 	}
532 }
533 
534 
535 int
536 raidsize(dev_t dev)
537 {
538 	struct raid_softc *rs;
539 	struct disklabel *lp;
540 	int     part, unit, omask, size;
541 
542 	unit = raidunit(dev);
543 	if (unit >= numraid)
544 		return (-1);
545 	rs = &raid_softc[unit];
546 
547 	if ((rs->sc_flags & RAIDF_INITED) == 0)
548 		return (-1);
549 
550 	part = DISKPART(dev);
551 	omask = rs->sc_dkdev.dk_openmask & (1 << part);
552 	lp = rs->sc_dkdev.dk_label;
553 
554 	if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
555 		return (-1);
556 
557 	if (lp->d_partitions[part].p_fstype != FS_SWAP)
558 		size = -1;
559 	else
560 		size = lp->d_partitions[part].p_size *
561 		    (lp->d_secsize / DEV_BSIZE);
562 
563 	if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
564 		return (-1);
565 
566 	return (size);
567 
568 }
569 
570 int
571 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
572 {
573 	int     unit = raidunit(dev);
574 	struct raid_softc *rs;
575 	const struct bdevsw *bdev;
576 	struct disklabel *lp;
577 	RF_Raid_t *raidPtr;
578 	daddr_t offset;
579 	int     part, c, sparecol, j, scol, dumpto;
580 	int     error = 0;
581 
582 	if (unit >= numraid)
583 		return (ENXIO);
584 
585 	rs = &raid_softc[unit];
586 	raidPtr = raidPtrs[unit];
587 
588 	if ((rs->sc_flags & RAIDF_INITED) == 0)
589 		return ENXIO;
590 
591 	/* we only support dumping to RAID 1 sets */
592 	if (raidPtr->Layout.numDataCol != 1 ||
593 	    raidPtr->Layout.numParityCol != 1)
594 		return EINVAL;
595 
596 
597 	if ((error = raidlock(rs)) != 0)
598 		return error;
599 
600 	if (size % DEV_BSIZE != 0) {
601 		error = EINVAL;
602 		goto out;
603 	}
604 
605 	if (blkno + size / DEV_BSIZE > rs->sc_size) {
606 		printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
607 		    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
608 		    size / DEV_BSIZE, rs->sc_size);
609 		error = EINVAL;
610 		goto out;
611 	}
612 
613 	part = DISKPART(dev);
614 	lp = rs->sc_dkdev.dk_label;
615 	offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
616 
617 	/* figure out what device is alive.. */
618 
619 	/*
620 	   Look for a component to dump to.  The preference for the
621 	   component to dump to is as follows:
622 	   1) the master
623 	   2) a used_spare of the master
624 	   3) the slave
625 	   4) a used_spare of the slave
626 	*/
627 
628 	dumpto = -1;
629 	for (c = 0; c < raidPtr->numCol; c++) {
630 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
631 			/* this might be the one */
632 			dumpto = c;
633 			break;
634 		}
635 	}
636 
637 	/*
638 	   At this point we have possibly selected a live master or a
639 	   live slave.  We now check to see if there is a spared
640 	   master (or a spared slave), if we didn't find a live master
641 	   or a live slave.
642 	*/
643 
644 	for (c = 0; c < raidPtr->numSpare; c++) {
645 		sparecol = raidPtr->numCol + c;
646 		if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
647 			/* How about this one? */
648 			scol = -1;
649 			for(j=0;j<raidPtr->numCol;j++) {
650 				if (raidPtr->Disks[j].spareCol == sparecol) {
651 					scol = j;
652 					break;
653 				}
654 			}
655 			if (scol == 0) {
656 				/*
657 				   We must have found a spared master!
658 				   We'll take that over anything else
659 				   found so far.  (We couldn't have
660 				   found a real master before, since
661 				   this is a used spare, and it's
662 				   saying that it's replacing the
663 				   master.)  On reboot (with
664 				   autoconfiguration turned on)
665 				   sparecol will become the 1st
666 				   component (component0) of this set.
667 				*/
668 				dumpto = sparecol;
669 				break;
670 			} else if (scol != -1) {
671 				/*
672 				   Must be a spared slave.  We'll dump
673 				   to that if we havn't found anything
674 				   else so far.
675 				*/
676 				if (dumpto == -1)
677 					dumpto = sparecol;
678 			}
679 		}
680 	}
681 
682 	if (dumpto == -1) {
683 		/* we couldn't find any live components to dump to!?!?
684 		 */
685 		error = EINVAL;
686 		goto out;
687 	}
688 
689 	bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
690 
691 	/*
692 	   Note that blkno is relative to this particular partition.
693 	   By adding the offset of this partition in the RAID
694 	   set, and also adding RF_PROTECTED_SECTORS, we get a
695 	   value that is relative to the partition used for the
696 	   underlying component.
697 	*/
698 
699 	error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
700 				blkno + offset, va, size);
701 
702 out:
703 	raidunlock(rs);
704 
705 	return error;
706 }
707 /* ARGSUSED */
708 int
709 raidopen(dev_t dev, int flags, int fmt,
710     struct lwp *l)
711 {
712 	int     unit = raidunit(dev);
713 	struct raid_softc *rs;
714 	struct disklabel *lp;
715 	int     part, pmask;
716 	int     error = 0;
717 
718 	if (unit >= numraid)
719 		return (ENXIO);
720 	rs = &raid_softc[unit];
721 
722 	if ((error = raidlock(rs)) != 0)
723 		return (error);
724 
725 	if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
726 		error = EBUSY;
727 		goto bad;
728 	}
729 
730 	lp = rs->sc_dkdev.dk_label;
731 
732 	part = DISKPART(dev);
733 
734 	/*
735 	 * If there are wedges, and this is not RAW_PART, then we
736 	 * need to fail.
737 	 */
738 	if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
739 		error = EBUSY;
740 		goto bad;
741 	}
742 	pmask = (1 << part);
743 
744 	if ((rs->sc_flags & RAIDF_INITED) &&
745 	    (rs->sc_dkdev.dk_openmask == 0))
746 		raidgetdisklabel(dev);
747 
748 	/* make sure that this partition exists */
749 
750 	if (part != RAW_PART) {
751 		if (((rs->sc_flags & RAIDF_INITED) == 0) ||
752 		    ((part >= lp->d_npartitions) ||
753 			(lp->d_partitions[part].p_fstype == FS_UNUSED))) {
754 			error = ENXIO;
755 			goto bad;
756 		}
757 	}
758 	/* Prevent this unit from being unconfigured while open. */
759 	switch (fmt) {
760 	case S_IFCHR:
761 		rs->sc_dkdev.dk_copenmask |= pmask;
762 		break;
763 
764 	case S_IFBLK:
765 		rs->sc_dkdev.dk_bopenmask |= pmask;
766 		break;
767 	}
768 
769 	if ((rs->sc_dkdev.dk_openmask == 0) &&
770 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
771 		/* First one... mark things as dirty... Note that we *MUST*
772 		 have done a configure before this.  I DO NOT WANT TO BE
773 		 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
774 		 THAT THEY BELONG TOGETHER!!!!! */
775 		/* XXX should check to see if we're only open for reading
776 		   here... If so, we needn't do this, but then need some
777 		   other way of keeping track of what's happened.. */
778 
779 		rf_markalldirty(raidPtrs[unit]);
780 	}
781 
782 
783 	rs->sc_dkdev.dk_openmask =
784 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
785 
786 bad:
787 	raidunlock(rs);
788 
789 	return (error);
790 
791 
792 }
793 /* ARGSUSED */
794 int
795 raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
796 {
797 	int     unit = raidunit(dev);
798 	struct raid_softc *rs;
799 	int     error = 0;
800 	int     part;
801 
802 	if (unit >= numraid)
803 		return (ENXIO);
804 	rs = &raid_softc[unit];
805 
806 	if ((error = raidlock(rs)) != 0)
807 		return (error);
808 
809 	part = DISKPART(dev);
810 
811 	/* ...that much closer to allowing unconfiguration... */
812 	switch (fmt) {
813 	case S_IFCHR:
814 		rs->sc_dkdev.dk_copenmask &= ~(1 << part);
815 		break;
816 
817 	case S_IFBLK:
818 		rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
819 		break;
820 	}
821 	rs->sc_dkdev.dk_openmask =
822 	    rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
823 
824 	if ((rs->sc_dkdev.dk_openmask == 0) &&
825 	    ((rs->sc_flags & RAIDF_INITED) != 0)) {
826 		/* Last one... device is not unconfigured yet.
827 		   Device shutdown has taken care of setting the
828 		   clean bits if RAIDF_INITED is not set
829 		   mark things as clean... */
830 
831 		rf_update_component_labels(raidPtrs[unit],
832 						 RF_FINAL_COMPONENT_UPDATE);
833 
834 		/* If the kernel is shutting down, it will detach
835 		 * this RAID set soon enough.
836 		 */
837 	}
838 
839 	raidunlock(rs);
840 	return (0);
841 
842 }
843 
844 void
845 raidstrategy(struct buf *bp)
846 {
847 	unsigned int raidID = raidunit(bp->b_dev);
848 	RF_Raid_t *raidPtr;
849 	struct raid_softc *rs = &raid_softc[raidID];
850 	int     wlabel;
851 
852 	if ((rs->sc_flags & RAIDF_INITED) ==0) {
853 		bp->b_error = ENXIO;
854 		goto done;
855 	}
856 	if (raidID >= numraid || !raidPtrs[raidID]) {
857 		bp->b_error = ENODEV;
858 		goto done;
859 	}
860 	raidPtr = raidPtrs[raidID];
861 	if (!raidPtr->valid) {
862 		bp->b_error = ENODEV;
863 		goto done;
864 	}
865 	if (bp->b_bcount == 0) {
866 		db1_printf(("b_bcount is zero..\n"));
867 		goto done;
868 	}
869 
870 	/*
871 	 * Do bounds checking and adjust transfer.  If there's an
872 	 * error, the bounds check will flag that for us.
873 	 */
874 
875 	wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
876 	if (DISKPART(bp->b_dev) == RAW_PART) {
877 		uint64_t size; /* device size in DEV_BSIZE unit */
878 
879 		if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
880 			size = raidPtr->totalSectors <<
881 			    (raidPtr->logBytesPerSector - DEV_BSHIFT);
882 		} else {
883 			size = raidPtr->totalSectors >>
884 			    (DEV_BSHIFT - raidPtr->logBytesPerSector);
885 		}
886 		if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
887 			goto done;
888 		}
889 	} else {
890 		if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
891 			db1_printf(("Bounds check failed!!:%d %d\n",
892 				(int) bp->b_blkno, (int) wlabel));
893 			goto done;
894 		}
895 	}
896 
897 	rf_lock_mutex2(raidPtr->iodone_lock);
898 
899 	bp->b_resid = 0;
900 
901 	/* stuff it onto our queue */
902 	bufq_put(rs->buf_queue, bp);
903 
904 	/* scheduled the IO to happen at the next convenient time */
905 	rf_signal_cond2(raidPtr->iodone_cv);
906 	rf_unlock_mutex2(raidPtr->iodone_lock);
907 
908 	return;
909 
910 done:
911 	bp->b_resid = bp->b_bcount;
912 	biodone(bp);
913 }
914 /* ARGSUSED */
915 int
916 raidread(dev_t dev, struct uio *uio, int flags)
917 {
918 	int     unit = raidunit(dev);
919 	struct raid_softc *rs;
920 
921 	if (unit >= numraid)
922 		return (ENXIO);
923 	rs = &raid_softc[unit];
924 
925 	if ((rs->sc_flags & RAIDF_INITED) == 0)
926 		return (ENXIO);
927 
928 	return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
929 
930 }
931 /* ARGSUSED */
932 int
933 raidwrite(dev_t dev, struct uio *uio, int flags)
934 {
935 	int     unit = raidunit(dev);
936 	struct raid_softc *rs;
937 
938 	if (unit >= numraid)
939 		return (ENXIO);
940 	rs = &raid_softc[unit];
941 
942 	if ((rs->sc_flags & RAIDF_INITED) == 0)
943 		return (ENXIO);
944 
945 	return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
946 
947 }
948 
949 static int
950 raid_detach_unlocked(struct raid_softc *rs)
951 {
952 	int error;
953 	RF_Raid_t *raidPtr;
954 
955 	raidPtr = raidPtrs[device_unit(rs->sc_dev)];
956 
957 	/*
958 	 * If somebody has a partition mounted, we shouldn't
959 	 * shutdown.
960 	 */
961 	if (rs->sc_dkdev.dk_openmask != 0)
962 		return EBUSY;
963 
964 	if ((rs->sc_flags & RAIDF_INITED) == 0)
965 		;	/* not initialized: nothing to do */
966 	else if ((error = rf_Shutdown(raidPtr)) != 0)
967 		return error;
968 	else
969 		rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
970 
971 	/* Detach the disk. */
972 	dkwedge_delall(&rs->sc_dkdev);
973 	disk_detach(&rs->sc_dkdev);
974 	disk_destroy(&rs->sc_dkdev);
975 
976 	aprint_normal_dev(rs->sc_dev, "detached\n");
977 
978 	return 0;
979 }
980 
981 int
982 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
983 {
984 	int     unit = raidunit(dev);
985 	int     error = 0;
986 	int     part, pmask, s;
987 	cfdata_t cf;
988 	struct raid_softc *rs;
989 	RF_Config_t *k_cfg, *u_cfg;
990 	RF_Raid_t *raidPtr;
991 	RF_RaidDisk_t *diskPtr;
992 	RF_AccTotals_t *totals;
993 	RF_DeviceConfig_t *d_cfg, **ucfgp;
994 	u_char *specific_buf;
995 	int retcode = 0;
996 	int column;
997 /*	int raidid; */
998 	struct rf_recon_req *rrcopy, *rr;
999 	RF_ComponentLabel_t *clabel;
1000 	RF_ComponentLabel_t *ci_label;
1001 	RF_ComponentLabel_t **clabel_ptr;
1002 	RF_SingleComponent_t *sparePtr,*componentPtr;
1003 	RF_SingleComponent_t component;
1004 	RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1005 	int i, j, d;
1006 #ifdef __HAVE_OLD_DISKLABEL
1007 	struct disklabel newlabel;
1008 #endif
1009 	struct dkwedge_info *dkw;
1010 
1011 	if (unit >= numraid)
1012 		return (ENXIO);
1013 	rs = &raid_softc[unit];
1014 	raidPtr = raidPtrs[unit];
1015 
1016 	db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1017 		(int) DISKPART(dev), (int) unit, cmd));
1018 
1019 	/* Must be open for writes for these commands... */
1020 	switch (cmd) {
1021 #ifdef DIOCGSECTORSIZE
1022 	case DIOCGSECTORSIZE:
1023 		*(u_int *)data = raidPtr->bytesPerSector;
1024 		return 0;
1025 	case DIOCGMEDIASIZE:
1026 		*(off_t *)data =
1027 		    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1028 		return 0;
1029 #endif
1030 	case DIOCSDINFO:
1031 	case DIOCWDINFO:
1032 #ifdef __HAVE_OLD_DISKLABEL
1033 	case ODIOCWDINFO:
1034 	case ODIOCSDINFO:
1035 #endif
1036 	case DIOCWLABEL:
1037 	case DIOCAWEDGE:
1038 	case DIOCDWEDGE:
1039 	case DIOCSSTRATEGY:
1040 		if ((flag & FWRITE) == 0)
1041 			return (EBADF);
1042 	}
1043 
1044 	/* Must be initialized for these... */
1045 	switch (cmd) {
1046 	case DIOCGDINFO:
1047 	case DIOCSDINFO:
1048 	case DIOCWDINFO:
1049 #ifdef __HAVE_OLD_DISKLABEL
1050 	case ODIOCGDINFO:
1051 	case ODIOCWDINFO:
1052 	case ODIOCSDINFO:
1053 	case ODIOCGDEFLABEL:
1054 #endif
1055 	case DIOCGPART:
1056 	case DIOCWLABEL:
1057 	case DIOCGDEFLABEL:
1058 	case DIOCAWEDGE:
1059 	case DIOCDWEDGE:
1060 	case DIOCLWEDGES:
1061 	case DIOCCACHESYNC:
1062 	case RAIDFRAME_SHUTDOWN:
1063 	case RAIDFRAME_REWRITEPARITY:
1064 	case RAIDFRAME_GET_INFO:
1065 	case RAIDFRAME_RESET_ACCTOTALS:
1066 	case RAIDFRAME_GET_ACCTOTALS:
1067 	case RAIDFRAME_KEEP_ACCTOTALS:
1068 	case RAIDFRAME_GET_SIZE:
1069 	case RAIDFRAME_FAIL_DISK:
1070 	case RAIDFRAME_COPYBACK:
1071 	case RAIDFRAME_CHECK_RECON_STATUS:
1072 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1073 	case RAIDFRAME_GET_COMPONENT_LABEL:
1074 	case RAIDFRAME_SET_COMPONENT_LABEL:
1075 	case RAIDFRAME_ADD_HOT_SPARE:
1076 	case RAIDFRAME_REMOVE_HOT_SPARE:
1077 	case RAIDFRAME_INIT_LABELS:
1078 	case RAIDFRAME_REBUILD_IN_PLACE:
1079 	case RAIDFRAME_CHECK_PARITY:
1080 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1081 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1082 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
1083 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1084 	case RAIDFRAME_SET_AUTOCONFIG:
1085 	case RAIDFRAME_SET_ROOT:
1086 	case RAIDFRAME_DELETE_COMPONENT:
1087 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
1088 	case RAIDFRAME_PARITYMAP_STATUS:
1089 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
1090 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
1091 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
1092 	case DIOCGSTRATEGY:
1093 	case DIOCSSTRATEGY:
1094 		if ((rs->sc_flags & RAIDF_INITED) == 0)
1095 			return (ENXIO);
1096 	}
1097 
1098 	switch (cmd) {
1099 #ifdef COMPAT_50
1100 	case RAIDFRAME_GET_INFO50:
1101 		return rf_get_info50(raidPtr, data);
1102 
1103 	case RAIDFRAME_CONFIGURE50:
1104 		if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1105 			return retcode;
1106 		goto config;
1107 #endif
1108 		/* configure the system */
1109 	case RAIDFRAME_CONFIGURE:
1110 
1111 		if (raidPtr->valid) {
1112 			/* There is a valid RAID set running on this unit! */
1113 			printf("raid%d: Device already configured!\n",unit);
1114 			return(EINVAL);
1115 		}
1116 
1117 		/* copy-in the configuration information */
1118 		/* data points to a pointer to the configuration structure */
1119 
1120 		u_cfg = *((RF_Config_t **) data);
1121 		RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1122 		if (k_cfg == NULL) {
1123 			return (ENOMEM);
1124 		}
1125 		retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1126 		if (retcode) {
1127 			RF_Free(k_cfg, sizeof(RF_Config_t));
1128 			db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1129 				retcode));
1130 			return (retcode);
1131 		}
1132 		goto config;
1133 	config:
1134 		/* allocate a buffer for the layout-specific data, and copy it
1135 		 * in */
1136 		if (k_cfg->layoutSpecificSize) {
1137 			if (k_cfg->layoutSpecificSize > 10000) {
1138 				/* sanity check */
1139 				RF_Free(k_cfg, sizeof(RF_Config_t));
1140 				return (EINVAL);
1141 			}
1142 			RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1143 			    (u_char *));
1144 			if (specific_buf == NULL) {
1145 				RF_Free(k_cfg, sizeof(RF_Config_t));
1146 				return (ENOMEM);
1147 			}
1148 			retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1149 			    k_cfg->layoutSpecificSize);
1150 			if (retcode) {
1151 				RF_Free(k_cfg, sizeof(RF_Config_t));
1152 				RF_Free(specific_buf,
1153 					k_cfg->layoutSpecificSize);
1154 				db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1155 					retcode));
1156 				return (retcode);
1157 			}
1158 		} else
1159 			specific_buf = NULL;
1160 		k_cfg->layoutSpecific = specific_buf;
1161 
1162 		/* should do some kind of sanity check on the configuration.
1163 		 * Store the sum of all the bytes in the last byte? */
1164 
1165 		/* configure the system */
1166 
1167 		/*
1168 		 * Clear the entire RAID descriptor, just to make sure
1169 		 *  there is no stale data left in the case of a
1170 		 *  reconfiguration
1171 		 */
1172 		memset(raidPtr, 0, sizeof(*raidPtr));
1173 		raidPtr->raidid = unit;
1174 
1175 		retcode = rf_Configure(raidPtr, k_cfg, NULL);
1176 
1177 		if (retcode == 0) {
1178 
1179 			/* allow this many simultaneous IO's to
1180 			   this RAID device */
1181 			raidPtr->openings = RAIDOUTSTANDING;
1182 
1183 			raidinit(raidPtr);
1184 			rf_markalldirty(raidPtr);
1185 		}
1186 		/* free the buffers.  No return code here. */
1187 		if (k_cfg->layoutSpecificSize) {
1188 			RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1189 		}
1190 		RF_Free(k_cfg, sizeof(RF_Config_t));
1191 
1192 		return (retcode);
1193 
1194 		/* shutdown the system */
1195 	case RAIDFRAME_SHUTDOWN:
1196 
1197 		part = DISKPART(dev);
1198 		pmask = (1 << part);
1199 
1200 		if ((error = raidlock(rs)) != 0)
1201 			return (error);
1202 
1203 		if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1204 		    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1205 			(rs->sc_dkdev.dk_copenmask & pmask)))
1206 			retcode = EBUSY;
1207 		else {
1208 			rs->sc_flags |= RAIDF_SHUTDOWN;
1209 			rs->sc_dkdev.dk_copenmask &= ~pmask;
1210 			rs->sc_dkdev.dk_bopenmask &= ~pmask;
1211 			rs->sc_dkdev.dk_openmask &= ~pmask;
1212 			retcode = 0;
1213 		}
1214 
1215 		raidunlock(rs);
1216 
1217 		if (retcode != 0)
1218 			return retcode;
1219 
1220 		/* free the pseudo device attach bits */
1221 
1222 		cf = device_cfdata(rs->sc_dev);
1223 		if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1224 			free(cf, M_RAIDFRAME);
1225 
1226 		return (retcode);
1227 	case RAIDFRAME_GET_COMPONENT_LABEL:
1228 		clabel_ptr = (RF_ComponentLabel_t **) data;
1229 		/* need to read the component label for the disk indicated
1230 		   by row,column in clabel */
1231 
1232 		/*
1233 		 * Perhaps there should be an option to skip the in-core
1234 		 * copy and hit the disk, as with disklabel(8).
1235 		 */
1236 		RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1237 
1238 		retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1239 
1240 		if (retcode) {
1241 			RF_Free(clabel, sizeof(*clabel));
1242 			return retcode;
1243 		}
1244 
1245 		clabel->row = 0; /* Don't allow looking at anything else.*/
1246 
1247 		column = clabel->column;
1248 
1249 		if ((column < 0) || (column >= raidPtr->numCol +
1250 		    raidPtr->numSpare)) {
1251 			RF_Free(clabel, sizeof(*clabel));
1252 			return EINVAL;
1253 		}
1254 
1255 		RF_Free(clabel, sizeof(*clabel));
1256 
1257 		clabel = raidget_component_label(raidPtr, column);
1258 
1259 		return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1260 
1261 #if 0
1262 	case RAIDFRAME_SET_COMPONENT_LABEL:
1263 		clabel = (RF_ComponentLabel_t *) data;
1264 
1265 		/* XXX check the label for valid stuff... */
1266 		/* Note that some things *should not* get modified --
1267 		   the user should be re-initing the labels instead of
1268 		   trying to patch things.
1269 		   */
1270 
1271 		raidid = raidPtr->raidid;
1272 #ifdef DEBUG
1273 		printf("raid%d: Got component label:\n", raidid);
1274 		printf("raid%d: Version: %d\n", raidid, clabel->version);
1275 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1276 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1277 		printf("raid%d: Column: %d\n", raidid, clabel->column);
1278 		printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1279 		printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1280 		printf("raid%d: Status: %d\n", raidid, clabel->status);
1281 #endif
1282 		clabel->row = 0;
1283 		column = clabel->column;
1284 
1285 		if ((column < 0) || (column >= raidPtr->numCol)) {
1286 			return(EINVAL);
1287 		}
1288 
1289 		/* XXX this isn't allowed to do anything for now :-) */
1290 
1291 		/* XXX and before it is, we need to fill in the rest
1292 		   of the fields!?!?!?! */
1293 		memcpy(raidget_component_label(raidPtr, column),
1294 		    clabel, sizeof(*clabel));
1295 		raidflush_component_label(raidPtr, column);
1296 		return (0);
1297 #endif
1298 
1299 	case RAIDFRAME_INIT_LABELS:
1300 		clabel = (RF_ComponentLabel_t *) data;
1301 		/*
1302 		   we only want the serial number from
1303 		   the above.  We get all the rest of the information
1304 		   from the config that was used to create this RAID
1305 		   set.
1306 		   */
1307 
1308 		raidPtr->serial_number = clabel->serial_number;
1309 
1310 		for(column=0;column<raidPtr->numCol;column++) {
1311 			diskPtr = &raidPtr->Disks[column];
1312 			if (!RF_DEAD_DISK(diskPtr->status)) {
1313 				ci_label = raidget_component_label(raidPtr,
1314 				    column);
1315 				/* Zeroing this is important. */
1316 				memset(ci_label, 0, sizeof(*ci_label));
1317 				raid_init_component_label(raidPtr, ci_label);
1318 				ci_label->serial_number =
1319 				    raidPtr->serial_number;
1320 				ci_label->row = 0; /* we dont' pretend to support more */
1321 				rf_component_label_set_partitionsize(ci_label,
1322 				    diskPtr->partitionSize);
1323 				ci_label->column = column;
1324 				raidflush_component_label(raidPtr, column);
1325 			}
1326 			/* XXXjld what about the spares? */
1327 		}
1328 
1329 		return (retcode);
1330 	case RAIDFRAME_SET_AUTOCONFIG:
1331 		d = rf_set_autoconfig(raidPtr, *(int *) data);
1332 		printf("raid%d: New autoconfig value is: %d\n",
1333 		       raidPtr->raidid, d);
1334 		*(int *) data = d;
1335 		return (retcode);
1336 
1337 	case RAIDFRAME_SET_ROOT:
1338 		d = rf_set_rootpartition(raidPtr, *(int *) data);
1339 		printf("raid%d: New rootpartition value is: %d\n",
1340 		       raidPtr->raidid, d);
1341 		*(int *) data = d;
1342 		return (retcode);
1343 
1344 		/* initialize all parity */
1345 	case RAIDFRAME_REWRITEPARITY:
1346 
1347 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1348 			/* Parity for RAID 0 is trivially correct */
1349 			raidPtr->parity_good = RF_RAID_CLEAN;
1350 			return(0);
1351 		}
1352 
1353 		if (raidPtr->parity_rewrite_in_progress == 1) {
1354 			/* Re-write is already in progress! */
1355 			return(EINVAL);
1356 		}
1357 
1358 		retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1359 					   rf_RewriteParityThread,
1360 					   raidPtr,"raid_parity");
1361 		return (retcode);
1362 
1363 
1364 	case RAIDFRAME_ADD_HOT_SPARE:
1365 		sparePtr = (RF_SingleComponent_t *) data;
1366 		memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1367 		retcode = rf_add_hot_spare(raidPtr, &component);
1368 		return(retcode);
1369 
1370 	case RAIDFRAME_REMOVE_HOT_SPARE:
1371 		return(retcode);
1372 
1373 	case RAIDFRAME_DELETE_COMPONENT:
1374 		componentPtr = (RF_SingleComponent_t *)data;
1375 		memcpy( &component, componentPtr,
1376 			sizeof(RF_SingleComponent_t));
1377 		retcode = rf_delete_component(raidPtr, &component);
1378 		return(retcode);
1379 
1380 	case RAIDFRAME_INCORPORATE_HOT_SPARE:
1381 		componentPtr = (RF_SingleComponent_t *)data;
1382 		memcpy( &component, componentPtr,
1383 			sizeof(RF_SingleComponent_t));
1384 		retcode = rf_incorporate_hot_spare(raidPtr, &component);
1385 		return(retcode);
1386 
1387 	case RAIDFRAME_REBUILD_IN_PLACE:
1388 
1389 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1390 			/* Can't do this on a RAID 0!! */
1391 			return(EINVAL);
1392 		}
1393 
1394 		if (raidPtr->recon_in_progress == 1) {
1395 			/* a reconstruct is already in progress! */
1396 			return(EINVAL);
1397 		}
1398 
1399 		componentPtr = (RF_SingleComponent_t *) data;
1400 		memcpy( &component, componentPtr,
1401 			sizeof(RF_SingleComponent_t));
1402 		component.row = 0; /* we don't support any more */
1403 		column = component.column;
1404 
1405 		if ((column < 0) || (column >= raidPtr->numCol)) {
1406 			return(EINVAL);
1407 		}
1408 
1409 		rf_lock_mutex2(raidPtr->mutex);
1410 		if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1411 		    (raidPtr->numFailures > 0)) {
1412 			/* XXX 0 above shouldn't be constant!!! */
1413 			/* some component other than this has failed.
1414 			   Let's not make things worse than they already
1415 			   are... */
1416 			printf("raid%d: Unable to reconstruct to disk at:\n",
1417 			       raidPtr->raidid);
1418 			printf("raid%d:     Col: %d   Too many failures.\n",
1419 			       raidPtr->raidid, column);
1420 			rf_unlock_mutex2(raidPtr->mutex);
1421 			return (EINVAL);
1422 		}
1423 		if (raidPtr->Disks[column].status ==
1424 		    rf_ds_reconstructing) {
1425 			printf("raid%d: Unable to reconstruct to disk at:\n",
1426 			       raidPtr->raidid);
1427 			printf("raid%d:    Col: %d   Reconstruction already occurring!\n", raidPtr->raidid, column);
1428 
1429 			rf_unlock_mutex2(raidPtr->mutex);
1430 			return (EINVAL);
1431 		}
1432 		if (raidPtr->Disks[column].status == rf_ds_spared) {
1433 			rf_unlock_mutex2(raidPtr->mutex);
1434 			return (EINVAL);
1435 		}
1436 		rf_unlock_mutex2(raidPtr->mutex);
1437 
1438 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1439 		if (rrcopy == NULL)
1440 			return(ENOMEM);
1441 
1442 		rrcopy->raidPtr = (void *) raidPtr;
1443 		rrcopy->col = column;
1444 
1445 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1446 					   rf_ReconstructInPlaceThread,
1447 					   rrcopy,"raid_reconip");
1448 		return(retcode);
1449 
1450 	case RAIDFRAME_GET_INFO:
1451 		if (!raidPtr->valid)
1452 			return (ENODEV);
1453 		ucfgp = (RF_DeviceConfig_t **) data;
1454 		RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1455 			  (RF_DeviceConfig_t *));
1456 		if (d_cfg == NULL)
1457 			return (ENOMEM);
1458 		d_cfg->rows = 1; /* there is only 1 row now */
1459 		d_cfg->cols = raidPtr->numCol;
1460 		d_cfg->ndevs = raidPtr->numCol;
1461 		if (d_cfg->ndevs >= RF_MAX_DISKS) {
1462 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1463 			return (ENOMEM);
1464 		}
1465 		d_cfg->nspares = raidPtr->numSpare;
1466 		if (d_cfg->nspares >= RF_MAX_DISKS) {
1467 			RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1468 			return (ENOMEM);
1469 		}
1470 		d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1471 		d = 0;
1472 		for (j = 0; j < d_cfg->cols; j++) {
1473 			d_cfg->devs[d] = raidPtr->Disks[j];
1474 			d++;
1475 		}
1476 		for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1477 			d_cfg->spares[i] = raidPtr->Disks[j];
1478 		}
1479 		retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1480 		RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1481 
1482 		return (retcode);
1483 
1484 	case RAIDFRAME_CHECK_PARITY:
1485 		*(int *) data = raidPtr->parity_good;
1486 		return (0);
1487 
1488 	case RAIDFRAME_PARITYMAP_STATUS:
1489 		if (rf_paritymap_ineligible(raidPtr))
1490 			return EINVAL;
1491 		rf_paritymap_status(raidPtr->parity_map,
1492 		    (struct rf_pmstat *)data);
1493 		return 0;
1494 
1495 	case RAIDFRAME_PARITYMAP_SET_PARAMS:
1496 		if (rf_paritymap_ineligible(raidPtr))
1497 			return EINVAL;
1498 		if (raidPtr->parity_map == NULL)
1499 			return ENOENT; /* ??? */
1500 		if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1501 			(struct rf_pmparams *)data, 1))
1502 			return EINVAL;
1503 		return 0;
1504 
1505 	case RAIDFRAME_PARITYMAP_GET_DISABLE:
1506 		if (rf_paritymap_ineligible(raidPtr))
1507 			return EINVAL;
1508 		*(int *) data = rf_paritymap_get_disable(raidPtr);
1509 		return 0;
1510 
1511 	case RAIDFRAME_PARITYMAP_SET_DISABLE:
1512 		if (rf_paritymap_ineligible(raidPtr))
1513 			return EINVAL;
1514 		rf_paritymap_set_disable(raidPtr, *(int *)data);
1515 		/* XXX should errors be passed up? */
1516 		return 0;
1517 
1518 	case RAIDFRAME_RESET_ACCTOTALS:
1519 		memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1520 		return (0);
1521 
1522 	case RAIDFRAME_GET_ACCTOTALS:
1523 		totals = (RF_AccTotals_t *) data;
1524 		*totals = raidPtr->acc_totals;
1525 		return (0);
1526 
1527 	case RAIDFRAME_KEEP_ACCTOTALS:
1528 		raidPtr->keep_acc_totals = *(int *)data;
1529 		return (0);
1530 
1531 	case RAIDFRAME_GET_SIZE:
1532 		*(int *) data = raidPtr->totalSectors;
1533 		return (0);
1534 
1535 		/* fail a disk & optionally start reconstruction */
1536 	case RAIDFRAME_FAIL_DISK:
1537 
1538 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1539 			/* Can't do this on a RAID 0!! */
1540 			return(EINVAL);
1541 		}
1542 
1543 		rr = (struct rf_recon_req *) data;
1544 		rr->row = 0;
1545 		if (rr->col < 0 || rr->col >= raidPtr->numCol)
1546 			return (EINVAL);
1547 
1548 
1549 		rf_lock_mutex2(raidPtr->mutex);
1550 		if (raidPtr->status == rf_rs_reconstructing) {
1551 			/* you can't fail a disk while we're reconstructing! */
1552 			/* XXX wrong for RAID6 */
1553 			rf_unlock_mutex2(raidPtr->mutex);
1554 			return (EINVAL);
1555 		}
1556 		if ((raidPtr->Disks[rr->col].status ==
1557 		     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1558 			/* some other component has failed.  Let's not make
1559 			   things worse. XXX wrong for RAID6 */
1560 			rf_unlock_mutex2(raidPtr->mutex);
1561 			return (EINVAL);
1562 		}
1563 		if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1564 			/* Can't fail a spared disk! */
1565 			rf_unlock_mutex2(raidPtr->mutex);
1566 			return (EINVAL);
1567 		}
1568 		rf_unlock_mutex2(raidPtr->mutex);
1569 
1570 		/* make a copy of the recon request so that we don't rely on
1571 		 * the user's buffer */
1572 		RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1573 		if (rrcopy == NULL)
1574 			return(ENOMEM);
1575 		memcpy(rrcopy, rr, sizeof(*rr));
1576 		rrcopy->raidPtr = (void *) raidPtr;
1577 
1578 		retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1579 					   rf_ReconThread,
1580 					   rrcopy,"raid_recon");
1581 		return (0);
1582 
1583 		/* invoke a copyback operation after recon on whatever disk
1584 		 * needs it, if any */
1585 	case RAIDFRAME_COPYBACK:
1586 
1587 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1588 			/* This makes no sense on a RAID 0!! */
1589 			return(EINVAL);
1590 		}
1591 
1592 		if (raidPtr->copyback_in_progress == 1) {
1593 			/* Copyback is already in progress! */
1594 			return(EINVAL);
1595 		}
1596 
1597 		retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1598 					   rf_CopybackThread,
1599 					   raidPtr,"raid_copyback");
1600 		return (retcode);
1601 
1602 		/* return the percentage completion of reconstruction */
1603 	case RAIDFRAME_CHECK_RECON_STATUS:
1604 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1605 			/* This makes no sense on a RAID 0, so tell the
1606 			   user it's done. */
1607 			*(int *) data = 100;
1608 			return(0);
1609 		}
1610 		if (raidPtr->status != rf_rs_reconstructing)
1611 			*(int *) data = 100;
1612 		else {
1613 			if (raidPtr->reconControl->numRUsTotal > 0) {
1614 				*(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1615 			} else {
1616 				*(int *) data = 0;
1617 			}
1618 		}
1619 		return (0);
1620 	case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1621 		progressInfoPtr = (RF_ProgressInfo_t **) data;
1622 		if (raidPtr->status != rf_rs_reconstructing) {
1623 			progressInfo.remaining = 0;
1624 			progressInfo.completed = 100;
1625 			progressInfo.total = 100;
1626 		} else {
1627 			progressInfo.total =
1628 				raidPtr->reconControl->numRUsTotal;
1629 			progressInfo.completed =
1630 				raidPtr->reconControl->numRUsComplete;
1631 			progressInfo.remaining = progressInfo.total -
1632 				progressInfo.completed;
1633 		}
1634 		retcode = copyout(&progressInfo, *progressInfoPtr,
1635 				  sizeof(RF_ProgressInfo_t));
1636 		return (retcode);
1637 
1638 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1639 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1640 			/* This makes no sense on a RAID 0, so tell the
1641 			   user it's done. */
1642 			*(int *) data = 100;
1643 			return(0);
1644 		}
1645 		if (raidPtr->parity_rewrite_in_progress == 1) {
1646 			*(int *) data = 100 *
1647 				raidPtr->parity_rewrite_stripes_done /
1648 				raidPtr->Layout.numStripe;
1649 		} else {
1650 			*(int *) data = 100;
1651 		}
1652 		return (0);
1653 
1654 	case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1655 		progressInfoPtr = (RF_ProgressInfo_t **) data;
1656 		if (raidPtr->parity_rewrite_in_progress == 1) {
1657 			progressInfo.total = raidPtr->Layout.numStripe;
1658 			progressInfo.completed =
1659 				raidPtr->parity_rewrite_stripes_done;
1660 			progressInfo.remaining = progressInfo.total -
1661 				progressInfo.completed;
1662 		} else {
1663 			progressInfo.remaining = 0;
1664 			progressInfo.completed = 100;
1665 			progressInfo.total = 100;
1666 		}
1667 		retcode = copyout(&progressInfo, *progressInfoPtr,
1668 				  sizeof(RF_ProgressInfo_t));
1669 		return (retcode);
1670 
1671 	case RAIDFRAME_CHECK_COPYBACK_STATUS:
1672 		if (raidPtr->Layout.map->faultsTolerated == 0) {
1673 			/* This makes no sense on a RAID 0 */
1674 			*(int *) data = 100;
1675 			return(0);
1676 		}
1677 		if (raidPtr->copyback_in_progress == 1) {
1678 			*(int *) data = 100 * raidPtr->copyback_stripes_done /
1679 				raidPtr->Layout.numStripe;
1680 		} else {
1681 			*(int *) data = 100;
1682 		}
1683 		return (0);
1684 
1685 	case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1686 		progressInfoPtr = (RF_ProgressInfo_t **) data;
1687 		if (raidPtr->copyback_in_progress == 1) {
1688 			progressInfo.total = raidPtr->Layout.numStripe;
1689 			progressInfo.completed =
1690 				raidPtr->copyback_stripes_done;
1691 			progressInfo.remaining = progressInfo.total -
1692 				progressInfo.completed;
1693 		} else {
1694 			progressInfo.remaining = 0;
1695 			progressInfo.completed = 100;
1696 			progressInfo.total = 100;
1697 		}
1698 		retcode = copyout(&progressInfo, *progressInfoPtr,
1699 				  sizeof(RF_ProgressInfo_t));
1700 		return (retcode);
1701 
1702 		/* the sparetable daemon calls this to wait for the kernel to
1703 		 * need a spare table. this ioctl does not return until a
1704 		 * spare table is needed. XXX -- calling mpsleep here in the
1705 		 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1706 		 * -- I should either compute the spare table in the kernel,
1707 		 * or have a different -- XXX XXX -- interface (a different
1708 		 * character device) for delivering the table     -- XXX */
1709 #if 0
1710 	case RAIDFRAME_SPARET_WAIT:
1711 		rf_lock_mutex2(rf_sparet_wait_mutex);
1712 		while (!rf_sparet_wait_queue)
1713 			rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1714 		waitreq = rf_sparet_wait_queue;
1715 		rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1716 		rf_unlock_mutex2(rf_sparet_wait_mutex);
1717 
1718 		/* structure assignment */
1719 		*((RF_SparetWait_t *) data) = *waitreq;
1720 
1721 		RF_Free(waitreq, sizeof(*waitreq));
1722 		return (0);
1723 
1724 		/* wakes up a process waiting on SPARET_WAIT and puts an error
1725 		 * code in it that will cause the dameon to exit */
1726 	case RAIDFRAME_ABORT_SPARET_WAIT:
1727 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1728 		waitreq->fcol = -1;
1729 		rf_lock_mutex2(rf_sparet_wait_mutex);
1730 		waitreq->next = rf_sparet_wait_queue;
1731 		rf_sparet_wait_queue = waitreq;
1732 		rf_broadcast_conf2(rf_sparet_wait_cv);
1733 		rf_unlock_mutex2(rf_sparet_wait_mutex);
1734 		return (0);
1735 
1736 		/* used by the spare table daemon to deliver a spare table
1737 		 * into the kernel */
1738 	case RAIDFRAME_SEND_SPARET:
1739 
1740 		/* install the spare table */
1741 		retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1742 
1743 		/* respond to the requestor.  the return status of the spare
1744 		 * table installation is passed in the "fcol" field */
1745 		RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1746 		waitreq->fcol = retcode;
1747 		rf_lock_mutex2(rf_sparet_wait_mutex);
1748 		waitreq->next = rf_sparet_resp_queue;
1749 		rf_sparet_resp_queue = waitreq;
1750 		rf_broadcast_cond2(rf_sparet_resp_cv);
1751 		rf_unlock_mutex2(rf_sparet_wait_mutex);
1752 
1753 		return (retcode);
1754 #endif
1755 
1756 	default:
1757 		break; /* fall through to the os-specific code below */
1758 
1759 	}
1760 
1761 	if (!raidPtr->valid)
1762 		return (EINVAL);
1763 
1764 	/*
1765 	 * Add support for "regular" device ioctls here.
1766 	 */
1767 
1768 	error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1769 	if (error != EPASSTHROUGH)
1770 		return (error);
1771 
1772 	switch (cmd) {
1773 	case DIOCGDINFO:
1774 		*(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1775 		break;
1776 #ifdef __HAVE_OLD_DISKLABEL
1777 	case ODIOCGDINFO:
1778 		newlabel = *(rs->sc_dkdev.dk_label);
1779 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1780 			return ENOTTY;
1781 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
1782 		break;
1783 #endif
1784 
1785 	case DIOCGPART:
1786 		((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1787 		((struct partinfo *) data)->part =
1788 		    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1789 		break;
1790 
1791 	case DIOCWDINFO:
1792 	case DIOCSDINFO:
1793 #ifdef __HAVE_OLD_DISKLABEL
1794 	case ODIOCWDINFO:
1795 	case ODIOCSDINFO:
1796 #endif
1797 	{
1798 		struct disklabel *lp;
1799 #ifdef __HAVE_OLD_DISKLABEL
1800 		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1801 			memset(&newlabel, 0, sizeof newlabel);
1802 			memcpy(&newlabel, data, sizeof (struct olddisklabel));
1803 			lp = &newlabel;
1804 		} else
1805 #endif
1806 		lp = (struct disklabel *)data;
1807 
1808 		if ((error = raidlock(rs)) != 0)
1809 			return (error);
1810 
1811 		rs->sc_flags |= RAIDF_LABELLING;
1812 
1813 		error = setdisklabel(rs->sc_dkdev.dk_label,
1814 		    lp, 0, rs->sc_dkdev.dk_cpulabel);
1815 		if (error == 0) {
1816 			if (cmd == DIOCWDINFO
1817 #ifdef __HAVE_OLD_DISKLABEL
1818 			    || cmd == ODIOCWDINFO
1819 #endif
1820 			   )
1821 				error = writedisklabel(RAIDLABELDEV(dev),
1822 				    raidstrategy, rs->sc_dkdev.dk_label,
1823 				    rs->sc_dkdev.dk_cpulabel);
1824 		}
1825 		rs->sc_flags &= ~RAIDF_LABELLING;
1826 
1827 		raidunlock(rs);
1828 
1829 		if (error)
1830 			return (error);
1831 		break;
1832 	}
1833 
1834 	case DIOCWLABEL:
1835 		if (*(int *) data != 0)
1836 			rs->sc_flags |= RAIDF_WLABEL;
1837 		else
1838 			rs->sc_flags &= ~RAIDF_WLABEL;
1839 		break;
1840 
1841 	case DIOCGDEFLABEL:
1842 		raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1843 		break;
1844 
1845 #ifdef __HAVE_OLD_DISKLABEL
1846 	case ODIOCGDEFLABEL:
1847 		raidgetdefaultlabel(raidPtr, rs, &newlabel);
1848 		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1849 			return ENOTTY;
1850 		memcpy(data, &newlabel, sizeof (struct olddisklabel));
1851 		break;
1852 #endif
1853 
1854 	case DIOCAWEDGE:
1855 	case DIOCDWEDGE:
1856 	    	dkw = (void *)data;
1857 
1858 		/* If the ioctl happens here, the parent is us. */
1859 		(void)strcpy(dkw->dkw_parent, rs->sc_xname);
1860 		return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1861 
1862 	case DIOCLWEDGES:
1863 		return dkwedge_list(&rs->sc_dkdev,
1864 		    (struct dkwedge_list *)data, l);
1865 	case DIOCCACHESYNC:
1866 		return rf_sync_component_caches(raidPtr);
1867 
1868 	case DIOCGSTRATEGY:
1869 	    {
1870 		struct disk_strategy *dks = (void *)data;
1871 
1872 		s = splbio();
1873 		strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1874 		    sizeof(dks->dks_name));
1875 		splx(s);
1876 		dks->dks_paramlen = 0;
1877 
1878 		return 0;
1879 	    }
1880 
1881 	case DIOCSSTRATEGY:
1882 	    {
1883 		struct disk_strategy *dks = (void *)data;
1884 		struct bufq_state *new;
1885 		struct bufq_state *old;
1886 
1887 		if (dks->dks_param != NULL) {
1888 			return EINVAL;
1889 		}
1890 		dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1891 		error = bufq_alloc(&new, dks->dks_name,
1892 		    BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1893 		if (error) {
1894 			return error;
1895 		}
1896 		s = splbio();
1897 		old = rs->buf_queue;
1898 		bufq_move(new, old);
1899 		rs->buf_queue = new;
1900 		splx(s);
1901 		bufq_free(old);
1902 
1903 		return 0;
1904 	    }
1905 
1906 	default:
1907 		retcode = ENOTTY;
1908 	}
1909 	return (retcode);
1910 
1911 }
1912 
1913 
1914 /* raidinit -- complete the rest of the initialization for the
1915    RAIDframe device.  */
1916 
1917 
1918 static void
1919 raidinit(RF_Raid_t *raidPtr)
1920 {
1921 	cfdata_t cf;
1922 	struct raid_softc *rs;
1923 	int     unit;
1924 
1925 	unit = raidPtr->raidid;
1926 
1927 	rs = &raid_softc[unit];
1928 
1929 	/* XXX should check return code first... */
1930 	rs->sc_flags |= RAIDF_INITED;
1931 
1932 	/* XXX doesn't check bounds. */
1933 	snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1934 
1935 	/* attach the pseudo device */
1936 	cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1937 	cf->cf_name = raid_cd.cd_name;
1938 	cf->cf_atname = raid_cd.cd_name;
1939 	cf->cf_unit = unit;
1940 	cf->cf_fstate = FSTATE_STAR;
1941 
1942 	rs->sc_dev = config_attach_pseudo(cf);
1943 
1944 	if (rs->sc_dev == NULL) {
1945 		printf("raid%d: config_attach_pseudo failed\n",
1946 		    raidPtr->raidid);
1947 		rs->sc_flags &= ~RAIDF_INITED;
1948 		free(cf, M_RAIDFRAME);
1949 		return;
1950 	}
1951 
1952 	/* disk_attach actually creates space for the CPU disklabel, among
1953 	 * other things, so it's critical to call this *BEFORE* we try putzing
1954 	 * with disklabels. */
1955 
1956 	disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1957 	disk_attach(&rs->sc_dkdev);
1958 	disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
1959 
1960 	/* XXX There may be a weird interaction here between this, and
1961 	 * protectedSectors, as used in RAIDframe.  */
1962 
1963 	rs->sc_size = raidPtr->totalSectors;
1964 
1965 	dkwedge_discover(&rs->sc_dkdev);
1966 
1967 	rf_set_properties(rs, raidPtr);
1968 
1969 }
1970 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1971 /* wake up the daemon & tell it to get us a spare table
1972  * XXX
1973  * the entries in the queues should be tagged with the raidPtr
1974  * so that in the extremely rare case that two recons happen at once,
1975  * we know for which device were requesting a spare table
1976  * XXX
1977  *
1978  * XXX This code is not currently used. GO
1979  */
1980 int
1981 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1982 {
1983 	int     retcode;
1984 
1985 	rf_lock_mutex2(rf_sparet_wait_mutex);
1986 	req->next = rf_sparet_wait_queue;
1987 	rf_sparet_wait_queue = req;
1988 	rf_broadcast_cond2(rf_sparet_wait_cv);
1989 
1990 	/* mpsleep unlocks the mutex */
1991 	while (!rf_sparet_resp_queue) {
1992 		rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1993 	}
1994 	req = rf_sparet_resp_queue;
1995 	rf_sparet_resp_queue = req->next;
1996 	rf_unlock_mutex2(rf_sparet_wait_mutex);
1997 
1998 	retcode = req->fcol;
1999 	RF_Free(req, sizeof(*req));	/* this is not the same req as we
2000 					 * alloc'd */
2001 	return (retcode);
2002 }
2003 #endif
2004 
2005 /* a wrapper around rf_DoAccess that extracts appropriate info from the
2006  * bp & passes it down.
2007  * any calls originating in the kernel must use non-blocking I/O
2008  * do some extra sanity checking to return "appropriate" error values for
2009  * certain conditions (to make some standard utilities work)
2010  *
2011  * Formerly known as: rf_DoAccessKernel
2012  */
2013 void
2014 raidstart(RF_Raid_t *raidPtr)
2015 {
2016 	RF_SectorCount_t num_blocks, pb, sum;
2017 	RF_RaidAddr_t raid_addr;
2018 	struct partition *pp;
2019 	daddr_t blocknum;
2020 	int     unit;
2021 	struct raid_softc *rs;
2022 	int     do_async;
2023 	struct buf *bp;
2024 	int rc;
2025 
2026 	unit = raidPtr->raidid;
2027 	rs = &raid_softc[unit];
2028 
2029 	/* quick check to see if anything has died recently */
2030 	rf_lock_mutex2(raidPtr->mutex);
2031 	if (raidPtr->numNewFailures > 0) {
2032 		rf_unlock_mutex2(raidPtr->mutex);
2033 		rf_update_component_labels(raidPtr,
2034 					   RF_NORMAL_COMPONENT_UPDATE);
2035 		rf_lock_mutex2(raidPtr->mutex);
2036 		raidPtr->numNewFailures--;
2037 	}
2038 
2039 	/* Check to see if we're at the limit... */
2040 	while (raidPtr->openings > 0) {
2041 		rf_unlock_mutex2(raidPtr->mutex);
2042 
2043 		/* get the next item, if any, from the queue */
2044 		if ((bp = bufq_get(rs->buf_queue)) == NULL) {
2045 			/* nothing more to do */
2046 			return;
2047 		}
2048 
2049 		/* Ok, for the bp we have here, bp->b_blkno is relative to the
2050 		 * partition.. Need to make it absolute to the underlying
2051 		 * device.. */
2052 
2053 		blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
2054 		if (DISKPART(bp->b_dev) != RAW_PART) {
2055 			pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2056 			blocknum += pp->p_offset;
2057 		}
2058 
2059 		db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
2060 			    (int) blocknum));
2061 
2062 		db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2063 		db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
2064 
2065 		/* *THIS* is where we adjust what block we're going to...
2066 		 * but DO NOT TOUCH bp->b_blkno!!! */
2067 		raid_addr = blocknum;
2068 
2069 		num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2070 		pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2071 		sum = raid_addr + num_blocks + pb;
2072 		if (1 || rf_debugKernelAccess) {
2073 			db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2074 				    (int) raid_addr, (int) sum, (int) num_blocks,
2075 				    (int) pb, (int) bp->b_resid));
2076 		}
2077 		if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2078 		    || (sum < num_blocks) || (sum < pb)) {
2079 			bp->b_error = ENOSPC;
2080 			bp->b_resid = bp->b_bcount;
2081 			biodone(bp);
2082 			rf_lock_mutex2(raidPtr->mutex);
2083 			continue;
2084 		}
2085 		/*
2086 		 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2087 		 */
2088 
2089 		if (bp->b_bcount & raidPtr->sectorMask) {
2090 			bp->b_error = EINVAL;
2091 			bp->b_resid = bp->b_bcount;
2092 			biodone(bp);
2093 			rf_lock_mutex2(raidPtr->mutex);
2094 			continue;
2095 
2096 		}
2097 		db1_printf(("Calling DoAccess..\n"));
2098 
2099 
2100 		rf_lock_mutex2(raidPtr->mutex);
2101 		raidPtr->openings--;
2102 		rf_unlock_mutex2(raidPtr->mutex);
2103 
2104 		/*
2105 		 * Everything is async.
2106 		 */
2107 		do_async = 1;
2108 
2109 		disk_busy(&rs->sc_dkdev);
2110 
2111 		/* XXX we're still at splbio() here... do we *really*
2112 		   need to be? */
2113 
2114 		/* don't ever condition on bp->b_flags & B_WRITE.
2115 		 * always condition on B_READ instead */
2116 
2117 		rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2118 				 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2119 				 do_async, raid_addr, num_blocks,
2120 				 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
2121 
2122 		if (rc) {
2123 			bp->b_error = rc;
2124 			bp->b_resid = bp->b_bcount;
2125 			biodone(bp);
2126 			/* continue loop */
2127 		}
2128 
2129 		rf_lock_mutex2(raidPtr->mutex);
2130 	}
2131 	rf_unlock_mutex2(raidPtr->mutex);
2132 }
2133 
2134 
2135 
2136 
2137 /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
2138 
2139 int
2140 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
2141 {
2142 	int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
2143 	struct buf *bp;
2144 
2145 	req->queue = queue;
2146 	bp = req->bp;
2147 
2148 	switch (req->type) {
2149 	case RF_IO_TYPE_NOP:	/* used primarily to unlock a locked queue */
2150 		/* XXX need to do something extra here.. */
2151 		/* I'm leaving this in, as I've never actually seen it used,
2152 		 * and I'd like folks to report it... GO */
2153 		printf(("WAKEUP CALLED\n"));
2154 		queue->numOutstanding++;
2155 
2156 		bp->b_flags = 0;
2157 		bp->b_private = req;
2158 
2159 		KernelWakeupFunc(bp);
2160 		break;
2161 
2162 	case RF_IO_TYPE_READ:
2163 	case RF_IO_TYPE_WRITE:
2164 #if RF_ACC_TRACE > 0
2165 		if (req->tracerec) {
2166 			RF_ETIMER_START(req->tracerec->timer);
2167 		}
2168 #endif
2169 		InitBP(bp, queue->rf_cinfo->ci_vp,
2170 		    op, queue->rf_cinfo->ci_dev,
2171 		    req->sectorOffset, req->numSector,
2172 		    req->buf, KernelWakeupFunc, (void *) req,
2173 		    queue->raidPtr->logBytesPerSector, req->b_proc);
2174 
2175 		if (rf_debugKernelAccess) {
2176 			db1_printf(("dispatch: bp->b_blkno = %ld\n",
2177 				(long) bp->b_blkno));
2178 		}
2179 		queue->numOutstanding++;
2180 		queue->last_deq_sector = req->sectorOffset;
2181 		/* acc wouldn't have been let in if there were any pending
2182 		 * reqs at any other priority */
2183 		queue->curPriority = req->priority;
2184 
2185 		db1_printf(("Going for %c to unit %d col %d\n",
2186 			    req->type, queue->raidPtr->raidid,
2187 			    queue->col));
2188 		db1_printf(("sector %d count %d (%d bytes) %d\n",
2189 			(int) req->sectorOffset, (int) req->numSector,
2190 			(int) (req->numSector <<
2191 			    queue->raidPtr->logBytesPerSector),
2192 			(int) queue->raidPtr->logBytesPerSector));
2193 
2194 		/*
2195 		 * XXX: drop lock here since this can block at
2196 		 * least with backing SCSI devices.  Retake it
2197 		 * to minimize fuss with calling interfaces.
2198 		 */
2199 
2200 		RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
2201 		bdev_strategy(bp);
2202 		RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
2203 		break;
2204 
2205 	default:
2206 		panic("bad req->type in rf_DispatchKernelIO");
2207 	}
2208 	db1_printf(("Exiting from DispatchKernelIO\n"));
2209 
2210 	return (0);
2211 }
2212 /* this is the callback function associated with a I/O invoked from
2213    kernel code.
2214  */
2215 static void
2216 KernelWakeupFunc(struct buf *bp)
2217 {
2218 	RF_DiskQueueData_t *req = NULL;
2219 	RF_DiskQueue_t *queue;
2220 
2221 	db1_printf(("recovering the request queue:\n"));
2222 
2223 	req = bp->b_private;
2224 
2225 	queue = (RF_DiskQueue_t *) req->queue;
2226 
2227 	rf_lock_mutex2(queue->raidPtr->iodone_lock);
2228 
2229 #if RF_ACC_TRACE > 0
2230 	if (req->tracerec) {
2231 		RF_ETIMER_STOP(req->tracerec->timer);
2232 		RF_ETIMER_EVAL(req->tracerec->timer);
2233 		rf_lock_mutex2(rf_tracing_mutex);
2234 		req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2235 		req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2236 		req->tracerec->num_phys_ios++;
2237 		rf_unlock_mutex2(rf_tracing_mutex);
2238 	}
2239 #endif
2240 
2241 	/* XXX Ok, let's get aggressive... If b_error is set, let's go
2242 	 * ballistic, and mark the component as hosed... */
2243 
2244 	if (bp->b_error != 0) {
2245 		/* Mark the disk as dead */
2246 		/* but only mark it once... */
2247 		/* and only if it wouldn't leave this RAID set
2248 		   completely broken */
2249 		if (((queue->raidPtr->Disks[queue->col].status ==
2250 		      rf_ds_optimal) ||
2251 		     (queue->raidPtr->Disks[queue->col].status ==
2252 		      rf_ds_used_spare)) &&
2253 		     (queue->raidPtr->numFailures <
2254 		      queue->raidPtr->Layout.map->faultsTolerated)) {
2255 			printf("raid%d: IO Error.  Marking %s as failed.\n",
2256 			       queue->raidPtr->raidid,
2257 			       queue->raidPtr->Disks[queue->col].devname);
2258 			queue->raidPtr->Disks[queue->col].status =
2259 			    rf_ds_failed;
2260 			queue->raidPtr->status = rf_rs_degraded;
2261 			queue->raidPtr->numFailures++;
2262 			queue->raidPtr->numNewFailures++;
2263 		} else {	/* Disk is already dead... */
2264 			/* printf("Disk already marked as dead!\n"); */
2265 		}
2266 
2267 	}
2268 
2269 	/* Fill in the error value */
2270 	req->error = bp->b_error;
2271 
2272 	/* Drop this one on the "finished" queue... */
2273 	TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2274 
2275 	/* Let the raidio thread know there is work to be done. */
2276 	rf_signal_cond2(queue->raidPtr->iodone_cv);
2277 
2278 	rf_unlock_mutex2(queue->raidPtr->iodone_lock);
2279 }
2280 
2281 
2282 /*
2283  * initialize a buf structure for doing an I/O in the kernel.
2284  */
2285 static void
2286 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
2287        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
2288        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2289        struct proc *b_proc)
2290 {
2291 	/* bp->b_flags       = B_PHYS | rw_flag; */
2292 	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
2293 	bp->b_oflags = 0;
2294 	bp->b_cflags = 0;
2295 	bp->b_bcount = numSect << logBytesPerSector;
2296 	bp->b_bufsize = bp->b_bcount;
2297 	bp->b_error = 0;
2298 	bp->b_dev = dev;
2299 	bp->b_data = bf;
2300 	bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
2301 	bp->b_resid = bp->b_bcount;	/* XXX is this right!??!?!! */
2302 	if (bp->b_bcount == 0) {
2303 		panic("bp->b_bcount is zero in InitBP!!");
2304 	}
2305 	bp->b_proc = b_proc;
2306 	bp->b_iodone = cbFunc;
2307 	bp->b_private = cbArg;
2308 }
2309 
2310 static void
2311 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2312 		    struct disklabel *lp)
2313 {
2314 	memset(lp, 0, sizeof(*lp));
2315 
2316 	/* fabricate a label... */
2317 	lp->d_secperunit = raidPtr->totalSectors;
2318 	lp->d_secsize = raidPtr->bytesPerSector;
2319 	lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2320 	lp->d_ntracks = 4 * raidPtr->numCol;
2321 	lp->d_ncylinders = raidPtr->totalSectors /
2322 		(lp->d_nsectors * lp->d_ntracks);
2323 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2324 
2325 	strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2326 	lp->d_type = DTYPE_RAID;
2327 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2328 	lp->d_rpm = 3600;
2329 	lp->d_interleave = 1;
2330 	lp->d_flags = 0;
2331 
2332 	lp->d_partitions[RAW_PART].p_offset = 0;
2333 	lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2334 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2335 	lp->d_npartitions = RAW_PART + 1;
2336 
2337 	lp->d_magic = DISKMAGIC;
2338 	lp->d_magic2 = DISKMAGIC;
2339 	lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2340 
2341 }
2342 /*
2343  * Read the disklabel from the raid device.  If one is not present, fake one
2344  * up.
2345  */
2346 static void
2347 raidgetdisklabel(dev_t dev)
2348 {
2349 	int     unit = raidunit(dev);
2350 	struct raid_softc *rs = &raid_softc[unit];
2351 	const char   *errstring;
2352 	struct disklabel *lp = rs->sc_dkdev.dk_label;
2353 	struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2354 	RF_Raid_t *raidPtr;
2355 
2356 	db1_printf(("Getting the disklabel...\n"));
2357 
2358 	memset(clp, 0, sizeof(*clp));
2359 
2360 	raidPtr = raidPtrs[unit];
2361 
2362 	raidgetdefaultlabel(raidPtr, rs, lp);
2363 
2364 	/*
2365 	 * Call the generic disklabel extraction routine.
2366 	 */
2367 	errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2368 	    rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
2369 	if (errstring)
2370 		raidmakedisklabel(rs);
2371 	else {
2372 		int     i;
2373 		struct partition *pp;
2374 
2375 		/*
2376 		 * Sanity check whether the found disklabel is valid.
2377 		 *
2378 		 * This is necessary since total size of the raid device
2379 		 * may vary when an interleave is changed even though exactly
2380 		 * same components are used, and old disklabel may used
2381 		 * if that is found.
2382 		 */
2383 		if (lp->d_secperunit != rs->sc_size)
2384 			printf("raid%d: WARNING: %s: "
2385 			    "total sector size in disklabel (%" PRIu32 ") != "
2386 			    "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2387 			    lp->d_secperunit, rs->sc_size);
2388 		for (i = 0; i < lp->d_npartitions; i++) {
2389 			pp = &lp->d_partitions[i];
2390 			if (pp->p_offset + pp->p_size > rs->sc_size)
2391 				printf("raid%d: WARNING: %s: end of partition `%c' "
2392 				       "exceeds the size of raid (%" PRIu64 ")\n",
2393 				       unit, rs->sc_xname, 'a' + i, rs->sc_size);
2394 		}
2395 	}
2396 
2397 }
2398 /*
2399  * Take care of things one might want to take care of in the event
2400  * that a disklabel isn't present.
2401  */
2402 static void
2403 raidmakedisklabel(struct raid_softc *rs)
2404 {
2405 	struct disklabel *lp = rs->sc_dkdev.dk_label;
2406 	db1_printf(("Making a label..\n"));
2407 
2408 	/*
2409 	 * For historical reasons, if there's no disklabel present
2410 	 * the raw partition must be marked FS_BSDFFS.
2411 	 */
2412 
2413 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2414 
2415 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2416 
2417 	lp->d_checksum = dkcksum(lp);
2418 }
2419 /*
2420  * Wait interruptibly for an exclusive lock.
2421  *
2422  * XXX
2423  * Several drivers do this; it should be abstracted and made MP-safe.
2424  * (Hmm... where have we seen this warning before :->  GO )
2425  */
2426 static int
2427 raidlock(struct raid_softc *rs)
2428 {
2429 	int     error;
2430 
2431 	while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2432 		rs->sc_flags |= RAIDF_WANTED;
2433 		if ((error =
2434 			tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2435 			return (error);
2436 	}
2437 	rs->sc_flags |= RAIDF_LOCKED;
2438 	return (0);
2439 }
2440 /*
2441  * Unlock and wake up any waiters.
2442  */
2443 static void
2444 raidunlock(struct raid_softc *rs)
2445 {
2446 
2447 	rs->sc_flags &= ~RAIDF_LOCKED;
2448 	if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2449 		rs->sc_flags &= ~RAIDF_WANTED;
2450 		wakeup(rs);
2451 	}
2452 }
2453 
2454 
2455 #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
2456 #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
2457 #define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
2458 
2459 static daddr_t
2460 rf_component_info_offset(void)
2461 {
2462 
2463 	return RF_COMPONENT_INFO_OFFSET;
2464 }
2465 
2466 static daddr_t
2467 rf_component_info_size(unsigned secsize)
2468 {
2469 	daddr_t info_size;
2470 
2471 	KASSERT(secsize);
2472 	if (secsize > RF_COMPONENT_INFO_SIZE)
2473 		info_size = secsize;
2474 	else
2475 		info_size = RF_COMPONENT_INFO_SIZE;
2476 
2477 	return info_size;
2478 }
2479 
2480 static daddr_t
2481 rf_parity_map_offset(RF_Raid_t *raidPtr)
2482 {
2483 	daddr_t map_offset;
2484 
2485 	KASSERT(raidPtr->bytesPerSector);
2486 	if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2487 		map_offset = raidPtr->bytesPerSector;
2488 	else
2489 		map_offset = RF_COMPONENT_INFO_SIZE;
2490 	map_offset += rf_component_info_offset();
2491 
2492 	return map_offset;
2493 }
2494 
2495 static daddr_t
2496 rf_parity_map_size(RF_Raid_t *raidPtr)
2497 {
2498 	daddr_t map_size;
2499 
2500 	if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2501 		map_size = raidPtr->bytesPerSector;
2502 	else
2503 		map_size = RF_PARITY_MAP_SIZE;
2504 
2505 	return map_size;
2506 }
2507 
2508 int
2509 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
2510 {
2511 	RF_ComponentLabel_t *clabel;
2512 
2513 	clabel = raidget_component_label(raidPtr, col);
2514 	clabel->clean = RF_RAID_CLEAN;
2515 	raidflush_component_label(raidPtr, col);
2516 	return(0);
2517 }
2518 
2519 
2520 int
2521 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
2522 {
2523 	RF_ComponentLabel_t *clabel;
2524 
2525 	clabel = raidget_component_label(raidPtr, col);
2526 	clabel->clean = RF_RAID_DIRTY;
2527 	raidflush_component_label(raidPtr, col);
2528 	return(0);
2529 }
2530 
2531 int
2532 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2533 {
2534 	KASSERT(raidPtr->bytesPerSector);
2535 	return raidread_component_label(raidPtr->bytesPerSector,
2536 	    raidPtr->Disks[col].dev,
2537 	    raidPtr->raid_cinfo[col].ci_vp,
2538 	    &raidPtr->raid_cinfo[col].ci_label);
2539 }
2540 
2541 RF_ComponentLabel_t *
2542 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2543 {
2544 	return &raidPtr->raid_cinfo[col].ci_label;
2545 }
2546 
2547 int
2548 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2549 {
2550 	RF_ComponentLabel_t *label;
2551 
2552 	label = &raidPtr->raid_cinfo[col].ci_label;
2553 	label->mod_counter = raidPtr->mod_counter;
2554 #ifndef RF_NO_PARITY_MAP
2555 	label->parity_map_modcount = label->mod_counter;
2556 #endif
2557 	return raidwrite_component_label(raidPtr->bytesPerSector,
2558 	    raidPtr->Disks[col].dev,
2559 	    raidPtr->raid_cinfo[col].ci_vp, label);
2560 }
2561 
2562 
2563 static int
2564 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2565     RF_ComponentLabel_t *clabel)
2566 {
2567 	return raidread_component_area(dev, b_vp, clabel,
2568 	    sizeof(RF_ComponentLabel_t),
2569 	    rf_component_info_offset(),
2570 	    rf_component_info_size(secsize));
2571 }
2572 
2573 /* ARGSUSED */
2574 static int
2575 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2576     size_t msize, daddr_t offset, daddr_t dsize)
2577 {
2578 	struct buf *bp;
2579 	const struct bdevsw *bdev;
2580 	int error;
2581 
2582 	/* XXX should probably ensure that we don't try to do this if
2583 	   someone has changed rf_protected_sectors. */
2584 
2585 	if (b_vp == NULL) {
2586 		/* For whatever reason, this component is not valid.
2587 		   Don't try to read a component label from it. */
2588 		return(EINVAL);
2589 	}
2590 
2591 	/* get a block of the appropriate size... */
2592 	bp = geteblk((int)dsize);
2593 	bp->b_dev = dev;
2594 
2595 	/* get our ducks in a row for the read */
2596 	bp->b_blkno = offset / DEV_BSIZE;
2597 	bp->b_bcount = dsize;
2598 	bp->b_flags |= B_READ;
2599  	bp->b_resid = dsize;
2600 
2601 	bdev = bdevsw_lookup(bp->b_dev);
2602 	if (bdev == NULL)
2603 		return (ENXIO);
2604 	(*bdev->d_strategy)(bp);
2605 
2606 	error = biowait(bp);
2607 
2608 	if (!error) {
2609 		memcpy(data, bp->b_data, msize);
2610 	}
2611 
2612 	brelse(bp, 0);
2613 	return(error);
2614 }
2615 
2616 
2617 static int
2618 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2619     RF_ComponentLabel_t *clabel)
2620 {
2621 	return raidwrite_component_area(dev, b_vp, clabel,
2622 	    sizeof(RF_ComponentLabel_t),
2623 	    rf_component_info_offset(),
2624 	    rf_component_info_size(secsize), 0);
2625 }
2626 
2627 /* ARGSUSED */
2628 static int
2629 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2630     size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
2631 {
2632 	struct buf *bp;
2633 	const struct bdevsw *bdev;
2634 	int error;
2635 
2636 	/* get a block of the appropriate size... */
2637 	bp = geteblk((int)dsize);
2638 	bp->b_dev = dev;
2639 
2640 	/* get our ducks in a row for the write */
2641 	bp->b_blkno = offset / DEV_BSIZE;
2642 	bp->b_bcount = dsize;
2643 	bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2644  	bp->b_resid = dsize;
2645 
2646 	memset(bp->b_data, 0, dsize);
2647 	memcpy(bp->b_data, data, msize);
2648 
2649 	bdev = bdevsw_lookup(bp->b_dev);
2650 	if (bdev == NULL)
2651 		return (ENXIO);
2652 	(*bdev->d_strategy)(bp);
2653 	if (asyncp)
2654 		return 0;
2655 	error = biowait(bp);
2656 	brelse(bp, 0);
2657 	if (error) {
2658 #if 1
2659 		printf("Failed to write RAID component info!\n");
2660 #endif
2661 	}
2662 
2663 	return(error);
2664 }
2665 
2666 void
2667 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2668 {
2669 	int c;
2670 
2671 	for (c = 0; c < raidPtr->numCol; c++) {
2672 		/* Skip dead disks. */
2673 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2674 			continue;
2675 		/* XXXjld: what if an error occurs here? */
2676 		raidwrite_component_area(raidPtr->Disks[c].dev,
2677 		    raidPtr->raid_cinfo[c].ci_vp, map,
2678 		    RF_PARITYMAP_NBYTE,
2679 		    rf_parity_map_offset(raidPtr),
2680 		    rf_parity_map_size(raidPtr), 0);
2681 	}
2682 }
2683 
2684 void
2685 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2686 {
2687 	struct rf_paritymap_ondisk tmp;
2688 	int c,first;
2689 
2690 	first=1;
2691 	for (c = 0; c < raidPtr->numCol; c++) {
2692 		/* Skip dead disks. */
2693 		if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2694 			continue;
2695 		raidread_component_area(raidPtr->Disks[c].dev,
2696 		    raidPtr->raid_cinfo[c].ci_vp, &tmp,
2697 		    RF_PARITYMAP_NBYTE,
2698 		    rf_parity_map_offset(raidPtr),
2699 		    rf_parity_map_size(raidPtr));
2700 		if (first) {
2701 			memcpy(map, &tmp, sizeof(*map));
2702 			first = 0;
2703 		} else {
2704 			rf_paritymap_merge(map, &tmp);
2705 		}
2706 	}
2707 }
2708 
2709 void
2710 rf_markalldirty(RF_Raid_t *raidPtr)
2711 {
2712 	RF_ComponentLabel_t *clabel;
2713 	int sparecol;
2714 	int c;
2715 	int j;
2716 	int scol = -1;
2717 
2718 	raidPtr->mod_counter++;
2719 	for (c = 0; c < raidPtr->numCol; c++) {
2720 		/* we don't want to touch (at all) a disk that has
2721 		   failed */
2722 		if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2723 			clabel = raidget_component_label(raidPtr, c);
2724 			if (clabel->status == rf_ds_spared) {
2725 				/* XXX do something special...
2726 				   but whatever you do, don't
2727 				   try to access it!! */
2728 			} else {
2729 				raidmarkdirty(raidPtr, c);
2730 			}
2731 		}
2732 	}
2733 
2734 	for( c = 0; c < raidPtr->numSpare ; c++) {
2735 		sparecol = raidPtr->numCol + c;
2736 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2737 			/*
2738 
2739 			   we claim this disk is "optimal" if it's
2740 			   rf_ds_used_spare, as that means it should be
2741 			   directly substitutable for the disk it replaced.
2742 			   We note that too...
2743 
2744 			 */
2745 
2746 			for(j=0;j<raidPtr->numCol;j++) {
2747 				if (raidPtr->Disks[j].spareCol == sparecol) {
2748 					scol = j;
2749 					break;
2750 				}
2751 			}
2752 
2753 			clabel = raidget_component_label(raidPtr, sparecol);
2754 			/* make sure status is noted */
2755 
2756 			raid_init_component_label(raidPtr, clabel);
2757 
2758 			clabel->row = 0;
2759 			clabel->column = scol;
2760 			/* Note: we *don't* change status from rf_ds_used_spare
2761 			   to rf_ds_optimal */
2762 			/* clabel.status = rf_ds_optimal; */
2763 
2764 			raidmarkdirty(raidPtr, sparecol);
2765 		}
2766 	}
2767 }
2768 
2769 
2770 void
2771 rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2772 {
2773 	RF_ComponentLabel_t *clabel;
2774 	int sparecol;
2775 	int c;
2776 	int j;
2777 	int scol;
2778 
2779 	scol = -1;
2780 
2781 	/* XXX should do extra checks to make sure things really are clean,
2782 	   rather than blindly setting the clean bit... */
2783 
2784 	raidPtr->mod_counter++;
2785 
2786 	for (c = 0; c < raidPtr->numCol; c++) {
2787 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
2788 			clabel = raidget_component_label(raidPtr, c);
2789 			/* make sure status is noted */
2790 			clabel->status = rf_ds_optimal;
2791 
2792 			/* note what unit we are configured as */
2793 			clabel->last_unit = raidPtr->raidid;
2794 
2795 			raidflush_component_label(raidPtr, c);
2796 			if (final == RF_FINAL_COMPONENT_UPDATE) {
2797 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
2798 					raidmarkclean(raidPtr, c);
2799 				}
2800 			}
2801 		}
2802 		/* else we don't touch it.. */
2803 	}
2804 
2805 	for( c = 0; c < raidPtr->numSpare ; c++) {
2806 		sparecol = raidPtr->numCol + c;
2807 		/* Need to ensure that the reconstruct actually completed! */
2808 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
2809 			/*
2810 
2811 			   we claim this disk is "optimal" if it's
2812 			   rf_ds_used_spare, as that means it should be
2813 			   directly substitutable for the disk it replaced.
2814 			   We note that too...
2815 
2816 			 */
2817 
2818 			for(j=0;j<raidPtr->numCol;j++) {
2819 				if (raidPtr->Disks[j].spareCol == sparecol) {
2820 					scol = j;
2821 					break;
2822 				}
2823 			}
2824 
2825 			/* XXX shouldn't *really* need this... */
2826 			clabel = raidget_component_label(raidPtr, sparecol);
2827 			/* make sure status is noted */
2828 
2829 			raid_init_component_label(raidPtr, clabel);
2830 
2831 			clabel->column = scol;
2832 			clabel->status = rf_ds_optimal;
2833 			clabel->last_unit = raidPtr->raidid;
2834 
2835 			raidflush_component_label(raidPtr, sparecol);
2836 			if (final == RF_FINAL_COMPONENT_UPDATE) {
2837 				if (raidPtr->parity_good == RF_RAID_CLEAN) {
2838 					raidmarkclean(raidPtr, sparecol);
2839 				}
2840 			}
2841 		}
2842 	}
2843 }
2844 
2845 void
2846 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2847 {
2848 
2849 	if (vp != NULL) {
2850 		if (auto_configured == 1) {
2851 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2852 			VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2853 			vput(vp);
2854 
2855 		} else {
2856 			(void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
2857 		}
2858 	}
2859 }
2860 
2861 
2862 void
2863 rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2864 {
2865 	int r,c;
2866 	struct vnode *vp;
2867 	int acd;
2868 
2869 
2870 	/* We take this opportunity to close the vnodes like we should.. */
2871 
2872 	for (c = 0; c < raidPtr->numCol; c++) {
2873 		vp = raidPtr->raid_cinfo[c].ci_vp;
2874 		acd = raidPtr->Disks[c].auto_configured;
2875 		rf_close_component(raidPtr, vp, acd);
2876 		raidPtr->raid_cinfo[c].ci_vp = NULL;
2877 		raidPtr->Disks[c].auto_configured = 0;
2878 	}
2879 
2880 	for (r = 0; r < raidPtr->numSpare; r++) {
2881 		vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2882 		acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
2883 		rf_close_component(raidPtr, vp, acd);
2884 		raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2885 		raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
2886 	}
2887 }
2888 
2889 
2890 void
2891 rf_ReconThread(struct rf_recon_req *req)
2892 {
2893 	int     s;
2894 	RF_Raid_t *raidPtr;
2895 
2896 	s = splbio();
2897 	raidPtr = (RF_Raid_t *) req->raidPtr;
2898 	raidPtr->recon_in_progress = 1;
2899 
2900 	rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
2901 		    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2902 
2903 	RF_Free(req, sizeof(*req));
2904 
2905 	raidPtr->recon_in_progress = 0;
2906 	splx(s);
2907 
2908 	/* That's all... */
2909 	kthread_exit(0);	/* does not return */
2910 }
2911 
2912 void
2913 rf_RewriteParityThread(RF_Raid_t *raidPtr)
2914 {
2915 	int retcode;
2916 	int s;
2917 
2918 	raidPtr->parity_rewrite_stripes_done = 0;
2919 	raidPtr->parity_rewrite_in_progress = 1;
2920 	s = splbio();
2921 	retcode = rf_RewriteParity(raidPtr);
2922 	splx(s);
2923 	if (retcode) {
2924 		printf("raid%d: Error re-writing parity (%d)!\n",
2925 		    raidPtr->raidid, retcode);
2926 	} else {
2927 		/* set the clean bit!  If we shutdown correctly,
2928 		   the clean bit on each component label will get
2929 		   set */
2930 		raidPtr->parity_good = RF_RAID_CLEAN;
2931 	}
2932 	raidPtr->parity_rewrite_in_progress = 0;
2933 
2934 	/* Anyone waiting for us to stop?  If so, inform them... */
2935 	if (raidPtr->waitShutdown) {
2936 		wakeup(&raidPtr->parity_rewrite_in_progress);
2937 	}
2938 
2939 	/* That's all... */
2940 	kthread_exit(0);	/* does not return */
2941 }
2942 
2943 
2944 void
2945 rf_CopybackThread(RF_Raid_t *raidPtr)
2946 {
2947 	int s;
2948 
2949 	raidPtr->copyback_in_progress = 1;
2950 	s = splbio();
2951 	rf_CopybackReconstructedData(raidPtr);
2952 	splx(s);
2953 	raidPtr->copyback_in_progress = 0;
2954 
2955 	/* That's all... */
2956 	kthread_exit(0);	/* does not return */
2957 }
2958 
2959 
2960 void
2961 rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2962 {
2963 	int s;
2964 	RF_Raid_t *raidPtr;
2965 
2966 	s = splbio();
2967 	raidPtr = req->raidPtr;
2968 	raidPtr->recon_in_progress = 1;
2969 	rf_ReconstructInPlace(raidPtr, req->col);
2970 	RF_Free(req, sizeof(*req));
2971 	raidPtr->recon_in_progress = 0;
2972 	splx(s);
2973 
2974 	/* That's all... */
2975 	kthread_exit(0);	/* does not return */
2976 }
2977 
2978 static RF_AutoConfig_t *
2979 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
2980     const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2981     unsigned secsize)
2982 {
2983 	int good_one = 0;
2984 	RF_ComponentLabel_t *clabel;
2985 	RF_AutoConfig_t *ac;
2986 
2987 	clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2988 	if (clabel == NULL) {
2989 oomem:
2990 		    while(ac_list) {
2991 			    ac = ac_list;
2992 			    if (ac->clabel)
2993 				    free(ac->clabel, M_RAIDFRAME);
2994 			    ac_list = ac_list->next;
2995 			    free(ac, M_RAIDFRAME);
2996 		    }
2997 		    printf("RAID auto config: out of memory!\n");
2998 		    return NULL; /* XXX probably should panic? */
2999 	}
3000 
3001 	if (!raidread_component_label(secsize, dev, vp, clabel)) {
3002 		/* Got the label.  Does it look reasonable? */
3003 		if (rf_reasonable_label(clabel, numsecs) &&
3004 		    (rf_component_label_partitionsize(clabel) <= size)) {
3005 #ifdef DEBUG
3006 			printf("Component on: %s: %llu\n",
3007 				cname, (unsigned long long)size);
3008 			rf_print_component_label(clabel);
3009 #endif
3010 			/* if it's reasonable, add it, else ignore it. */
3011 			ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
3012 				M_NOWAIT);
3013 			if (ac == NULL) {
3014 				free(clabel, M_RAIDFRAME);
3015 				goto oomem;
3016 			}
3017 			strlcpy(ac->devname, cname, sizeof(ac->devname));
3018 			ac->dev = dev;
3019 			ac->vp = vp;
3020 			ac->clabel = clabel;
3021 			ac->next = ac_list;
3022 			ac_list = ac;
3023 			good_one = 1;
3024 		}
3025 	}
3026 	if (!good_one) {
3027 		/* cleanup */
3028 		free(clabel, M_RAIDFRAME);
3029 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3030 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3031 		vput(vp);
3032 	}
3033 	return ac_list;
3034 }
3035 
3036 RF_AutoConfig_t *
3037 rf_find_raid_components(void)
3038 {
3039 	struct vnode *vp;
3040 	struct disklabel label;
3041 	device_t dv;
3042 	deviter_t di;
3043 	dev_t dev;
3044 	int bmajor, bminor, wedge, rf_part_found;
3045 	int error;
3046 	int i;
3047 	RF_AutoConfig_t *ac_list;
3048 	uint64_t numsecs;
3049 	unsigned secsize;
3050 
3051 	/* initialize the AutoConfig list */
3052 	ac_list = NULL;
3053 
3054 	/* we begin by trolling through *all* the devices on the system */
3055 
3056 	for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3057 	     dv = deviter_next(&di)) {
3058 
3059 		/* we are only interested in disks... */
3060 		if (device_class(dv) != DV_DISK)
3061 			continue;
3062 
3063 		/* we don't care about floppies... */
3064 		if (device_is_a(dv, "fd")) {
3065 			continue;
3066 		}
3067 
3068 		/* we don't care about CD's... */
3069 		if (device_is_a(dv, "cd")) {
3070 			continue;
3071 		}
3072 
3073 		/* we don't care about md's... */
3074 		if (device_is_a(dv, "md")) {
3075 			continue;
3076 		}
3077 
3078 		/* hdfd is the Atari/Hades floppy driver */
3079 		if (device_is_a(dv, "hdfd")) {
3080 			continue;
3081 		}
3082 
3083 		/* fdisa is the Atari/Milan floppy driver */
3084 		if (device_is_a(dv, "fdisa")) {
3085 			continue;
3086 		}
3087 
3088 		/* need to find the device_name_to_block_device_major stuff */
3089 		bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
3090 
3091 		rf_part_found = 0; /*No raid partition as yet*/
3092 
3093 		/* get a vnode for the raw partition of this disk */
3094 
3095 		wedge = device_is_a(dv, "dk");
3096 		bminor = minor(device_unit(dv));
3097 		dev = wedge ? makedev(bmajor, bminor) :
3098 		    MAKEDISKDEV(bmajor, bminor, RAW_PART);
3099 		if (bdevvp(dev, &vp))
3100 			panic("RAID can't alloc vnode");
3101 
3102 		error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
3103 
3104 		if (error) {
3105 			/* "Who cares."  Continue looking
3106 			   for something that exists*/
3107 			vput(vp);
3108 			continue;
3109 		}
3110 
3111 		error = getdisksize(vp, &numsecs, &secsize);
3112 		if (error) {
3113 			vput(vp);
3114 			continue;
3115 		}
3116 		if (wedge) {
3117 			struct dkwedge_info dkw;
3118 			error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
3119 			    NOCRED);
3120 			if (error) {
3121 				printf("RAIDframe: can't get wedge info for "
3122 				    "dev %s (%d)\n", device_xname(dv), error);
3123 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3124 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3125 				vput(vp);
3126 				continue;
3127 			}
3128 
3129 			if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3130 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3131 				VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3132 				vput(vp);
3133 				continue;
3134 			}
3135 
3136 			ac_list = rf_get_component(ac_list, dev, vp,
3137 			    device_xname(dv), dkw.dkw_size, numsecs, secsize);
3138 			rf_part_found = 1; /*There is a raid component on this disk*/
3139 			continue;
3140 		}
3141 
3142 		/* Ok, the disk exists.  Go get the disklabel. */
3143 		error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
3144 		if (error) {
3145 			/*
3146 			 * XXX can't happen - open() would
3147 			 * have errored out (or faked up one)
3148 			 */
3149 			if (error != ENOTTY)
3150 				printf("RAIDframe: can't get label for dev "
3151 				    "%s (%d)\n", device_xname(dv), error);
3152 		}
3153 
3154 		/* don't need this any more.  We'll allocate it again
3155 		   a little later if we really do... */
3156 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3157 		VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3158 		vput(vp);
3159 
3160 		if (error)
3161 			continue;
3162 
3163 		rf_part_found = 0; /*No raid partitions yet*/
3164 		for (i = 0; i < label.d_npartitions; i++) {
3165 			char cname[sizeof(ac_list->devname)];
3166 
3167 			/* We only support partitions marked as RAID */
3168 			if (label.d_partitions[i].p_fstype != FS_RAID)
3169 				continue;
3170 
3171 			dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
3172 			if (bdevvp(dev, &vp))
3173 				panic("RAID can't alloc vnode");
3174 
3175 			error = VOP_OPEN(vp, FREAD, NOCRED);
3176 			if (error) {
3177 				/* Whatever... */
3178 				vput(vp);
3179 				continue;
3180 			}
3181 			snprintf(cname, sizeof(cname), "%s%c",
3182 			    device_xname(dv), 'a' + i);
3183 			ac_list = rf_get_component(ac_list, dev, vp, cname,
3184 				label.d_partitions[i].p_size, numsecs, secsize);
3185 				rf_part_found = 1; /*There is at least one raid partition on this disk*/
3186 		}
3187 
3188 		/*
3189 		 *If there is no raid component on this disk, either in a
3190 		 *disklabel or inside a wedge, check the raw partition as well,
3191 		 *as it is possible to configure raid components on raw disk
3192 		 *devices.
3193 		 */
3194 
3195 		if (!rf_part_found) {
3196 			char cname[sizeof(ac_list->devname)];
3197 
3198 			dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3199 			if (bdevvp(dev, &vp))
3200 				panic("RAID can't alloc vnode");
3201 
3202 			error = VOP_OPEN(vp, FREAD, NOCRED);
3203 			if (error) {
3204 				/* Whatever... */
3205 				vput(vp);
3206 				continue;
3207 			}
3208 			snprintf(cname, sizeof(cname), "%s%c",
3209 			    device_xname(dv), 'a' + RAW_PART);
3210 			ac_list = rf_get_component(ac_list, dev, vp, cname,
3211 				label.d_partitions[RAW_PART].p_size, numsecs, secsize);
3212 		}
3213 	}
3214 	deviter_release(&di);
3215 	return ac_list;
3216 }
3217 
3218 
3219 int
3220 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3221 {
3222 
3223 	if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3224 	     (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3225 	    ((clabel->clean == RF_RAID_CLEAN) ||
3226 	     (clabel->clean == RF_RAID_DIRTY)) &&
3227 	    clabel->row >=0 &&
3228 	    clabel->column >= 0 &&
3229 	    clabel->num_rows > 0 &&
3230 	    clabel->num_columns > 0 &&
3231 	    clabel->row < clabel->num_rows &&
3232 	    clabel->column < clabel->num_columns &&
3233 	    clabel->blockSize > 0 &&
3234 	    /*
3235 	     * numBlocksHi may contain garbage, but it is ok since
3236 	     * the type is unsigned.  If it is really garbage,
3237 	     * rf_fix_old_label_size() will fix it.
3238 	     */
3239 	    rf_component_label_numblocks(clabel) > 0) {
3240 		/*
3241 		 * label looks reasonable enough...
3242 		 * let's make sure it has no old garbage.
3243 		 */
3244 		if (numsecs)
3245 			rf_fix_old_label_size(clabel, numsecs);
3246 		return(1);
3247 	}
3248 	return(0);
3249 }
3250 
3251 
3252 /*
3253  * For reasons yet unknown, some old component labels have garbage in
3254  * the newer numBlocksHi region, and this causes lossage.  Since those
3255  * disks will also have numsecs set to less than 32 bits of sectors,
3256  * we can determine when this corruption has occurred, and fix it.
3257  *
3258  * The exact same problem, with the same unknown reason, happens to
3259  * the partitionSizeHi member as well.
3260  */
3261 static void
3262 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3263 {
3264 
3265 	if (numsecs < ((uint64_t)1 << 32)) {
3266 		if (clabel->numBlocksHi) {
3267 			printf("WARNING: total sectors < 32 bits, yet "
3268 			       "numBlocksHi set\n"
3269 			       "WARNING: resetting numBlocksHi to zero.\n");
3270 			clabel->numBlocksHi = 0;
3271 		}
3272 
3273 		if (clabel->partitionSizeHi) {
3274 			printf("WARNING: total sectors < 32 bits, yet "
3275 			       "partitionSizeHi set\n"
3276 			       "WARNING: resetting partitionSizeHi to zero.\n");
3277 			clabel->partitionSizeHi = 0;
3278 		}
3279 	}
3280 }
3281 
3282 
3283 #ifdef DEBUG
3284 void
3285 rf_print_component_label(RF_ComponentLabel_t *clabel)
3286 {
3287 	uint64_t numBlocks;
3288 
3289 	numBlocks = rf_component_label_numblocks(clabel);
3290 
3291 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
3292 	       clabel->row, clabel->column,
3293 	       clabel->num_rows, clabel->num_columns);
3294 	printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
3295 	       clabel->version, clabel->serial_number,
3296 	       clabel->mod_counter);
3297 	printf("   Clean: %s Status: %d\n",
3298 	       clabel->clean ? "Yes" : "No", clabel->status);
3299 	printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3300 	       clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
3301 	printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
3302 	       (char) clabel->parityConfig, clabel->blockSize, numBlocks);
3303 	printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
3304 	printf("   Contains root partition: %s\n",
3305 	       clabel->root_partition ? "Yes" : "No");
3306 	printf("   Last configured as: raid%d\n", clabel->last_unit);
3307 #if 0
3308 	   printf("   Config order: %d\n", clabel->config_order);
3309 #endif
3310 
3311 }
3312 #endif
3313 
3314 RF_ConfigSet_t *
3315 rf_create_auto_sets(RF_AutoConfig_t *ac_list)
3316 {
3317 	RF_AutoConfig_t *ac;
3318 	RF_ConfigSet_t *config_sets;
3319 	RF_ConfigSet_t *cset;
3320 	RF_AutoConfig_t *ac_next;
3321 
3322 
3323 	config_sets = NULL;
3324 
3325 	/* Go through the AutoConfig list, and figure out which components
3326 	   belong to what sets.  */
3327 	ac = ac_list;
3328 	while(ac!=NULL) {
3329 		/* we're going to putz with ac->next, so save it here
3330 		   for use at the end of the loop */
3331 		ac_next = ac->next;
3332 
3333 		if (config_sets == NULL) {
3334 			/* will need at least this one... */
3335 			config_sets = (RF_ConfigSet_t *)
3336 				malloc(sizeof(RF_ConfigSet_t),
3337 				       M_RAIDFRAME, M_NOWAIT);
3338 			if (config_sets == NULL) {
3339 				panic("rf_create_auto_sets: No memory!");
3340 			}
3341 			/* this one is easy :) */
3342 			config_sets->ac = ac;
3343 			config_sets->next = NULL;
3344 			config_sets->rootable = 0;
3345 			ac->next = NULL;
3346 		} else {
3347 			/* which set does this component fit into? */
3348 			cset = config_sets;
3349 			while(cset!=NULL) {
3350 				if (rf_does_it_fit(cset, ac)) {
3351 					/* looks like it matches... */
3352 					ac->next = cset->ac;
3353 					cset->ac = ac;
3354 					break;
3355 				}
3356 				cset = cset->next;
3357 			}
3358 			if (cset==NULL) {
3359 				/* didn't find a match above... new set..*/
3360 				cset = (RF_ConfigSet_t *)
3361 					malloc(sizeof(RF_ConfigSet_t),
3362 					       M_RAIDFRAME, M_NOWAIT);
3363 				if (cset == NULL) {
3364 					panic("rf_create_auto_sets: No memory!");
3365 				}
3366 				cset->ac = ac;
3367 				ac->next = NULL;
3368 				cset->next = config_sets;
3369 				cset->rootable = 0;
3370 				config_sets = cset;
3371 			}
3372 		}
3373 		ac = ac_next;
3374 	}
3375 
3376 
3377 	return(config_sets);
3378 }
3379 
3380 static int
3381 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
3382 {
3383 	RF_ComponentLabel_t *clabel1, *clabel2;
3384 
3385 	/* If this one matches the *first* one in the set, that's good
3386 	   enough, since the other members of the set would have been
3387 	   through here too... */
3388 	/* note that we are not checking partitionSize here..
3389 
3390 	   Note that we are also not checking the mod_counters here.
3391 	   If everything else matches except the mod_counter, that's
3392 	   good enough for this test.  We will deal with the mod_counters
3393 	   a little later in the autoconfiguration process.
3394 
3395 	    (clabel1->mod_counter == clabel2->mod_counter) &&
3396 
3397 	   The reason we don't check for this is that failed disks
3398 	   will have lower modification counts.  If those disks are
3399 	   not added to the set they used to belong to, then they will
3400 	   form their own set, which may result in 2 different sets,
3401 	   for example, competing to be configured at raid0, and
3402 	   perhaps competing to be the root filesystem set.  If the
3403 	   wrong ones get configured, or both attempt to become /,
3404 	   weird behaviour and or serious lossage will occur.  Thus we
3405 	   need to bring them into the fold here, and kick them out at
3406 	   a later point.
3407 
3408 	*/
3409 
3410 	clabel1 = cset->ac->clabel;
3411 	clabel2 = ac->clabel;
3412 	if ((clabel1->version == clabel2->version) &&
3413 	    (clabel1->serial_number == clabel2->serial_number) &&
3414 	    (clabel1->num_rows == clabel2->num_rows) &&
3415 	    (clabel1->num_columns == clabel2->num_columns) &&
3416 	    (clabel1->sectPerSU == clabel2->sectPerSU) &&
3417 	    (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3418 	    (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3419 	    (clabel1->parityConfig == clabel2->parityConfig) &&
3420 	    (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3421 	    (clabel1->blockSize == clabel2->blockSize) &&
3422 	    rf_component_label_numblocks(clabel1) ==
3423 	    rf_component_label_numblocks(clabel2) &&
3424 	    (clabel1->autoconfigure == clabel2->autoconfigure) &&
3425 	    (clabel1->root_partition == clabel2->root_partition) &&
3426 	    (clabel1->last_unit == clabel2->last_unit) &&
3427 	    (clabel1->config_order == clabel2->config_order)) {
3428 		/* if it get's here, it almost *has* to be a match */
3429 	} else {
3430 		/* it's not consistent with somebody in the set..
3431 		   punt */
3432 		return(0);
3433 	}
3434 	/* all was fine.. it must fit... */
3435 	return(1);
3436 }
3437 
3438 int
3439 rf_have_enough_components(RF_ConfigSet_t *cset)
3440 {
3441 	RF_AutoConfig_t *ac;
3442 	RF_AutoConfig_t *auto_config;
3443 	RF_ComponentLabel_t *clabel;
3444 	int c;
3445 	int num_cols;
3446 	int num_missing;
3447 	int mod_counter;
3448 	int mod_counter_found;
3449 	int even_pair_failed;
3450 	char parity_type;
3451 
3452 
3453 	/* check to see that we have enough 'live' components
3454 	   of this set.  If so, we can configure it if necessary */
3455 
3456 	num_cols = cset->ac->clabel->num_columns;
3457 	parity_type = cset->ac->clabel->parityConfig;
3458 
3459 	/* XXX Check for duplicate components!?!?!? */
3460 
3461 	/* Determine what the mod_counter is supposed to be for this set. */
3462 
3463 	mod_counter_found = 0;
3464 	mod_counter = 0;
3465 	ac = cset->ac;
3466 	while(ac!=NULL) {
3467 		if (mod_counter_found==0) {
3468 			mod_counter = ac->clabel->mod_counter;
3469 			mod_counter_found = 1;
3470 		} else {
3471 			if (ac->clabel->mod_counter > mod_counter) {
3472 				mod_counter = ac->clabel->mod_counter;
3473 			}
3474 		}
3475 		ac = ac->next;
3476 	}
3477 
3478 	num_missing = 0;
3479 	auto_config = cset->ac;
3480 
3481 	even_pair_failed = 0;
3482 	for(c=0; c<num_cols; c++) {
3483 		ac = auto_config;
3484 		while(ac!=NULL) {
3485 			if ((ac->clabel->column == c) &&
3486 			    (ac->clabel->mod_counter == mod_counter)) {
3487 				/* it's this one... */
3488 #ifdef DEBUG
3489 				printf("Found: %s at %d\n",
3490 				       ac->devname,c);
3491 #endif
3492 				break;
3493 			}
3494 			ac=ac->next;
3495 		}
3496 		if (ac==NULL) {
3497 				/* Didn't find one here! */
3498 				/* special case for RAID 1, especially
3499 				   where there are more than 2
3500 				   components (where RAIDframe treats
3501 				   things a little differently :( ) */
3502 			if (parity_type == '1') {
3503 				if (c%2 == 0) { /* even component */
3504 					even_pair_failed = 1;
3505 				} else { /* odd component.  If
3506 					    we're failed, and
3507 					    so is the even
3508 					    component, it's
3509 					    "Good Night, Charlie" */
3510 					if (even_pair_failed == 1) {
3511 						return(0);
3512 					}
3513 				}
3514 			} else {
3515 				/* normal accounting */
3516 				num_missing++;
3517 			}
3518 		}
3519 		if ((parity_type == '1') && (c%2 == 1)) {
3520 				/* Just did an even component, and we didn't
3521 				   bail.. reset the even_pair_failed flag,
3522 				   and go on to the next component.... */
3523 			even_pair_failed = 0;
3524 		}
3525 	}
3526 
3527 	clabel = cset->ac->clabel;
3528 
3529 	if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3530 	    ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3531 	    ((clabel->parityConfig == '5') && (num_missing > 1))) {
3532 		/* XXX this needs to be made *much* more general */
3533 		/* Too many failures */
3534 		return(0);
3535 	}
3536 	/* otherwise, all is well, and we've got enough to take a kick
3537 	   at autoconfiguring this set */
3538 	return(1);
3539 }
3540 
3541 void
3542 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3543 			RF_Raid_t *raidPtr)
3544 {
3545 	RF_ComponentLabel_t *clabel;
3546 	int i;
3547 
3548 	clabel = ac->clabel;
3549 
3550 	/* 1. Fill in the common stuff */
3551 	config->numRow = clabel->num_rows = 1;
3552 	config->numCol = clabel->num_columns;
3553 	config->numSpare = 0; /* XXX should this be set here? */
3554 	config->sectPerSU = clabel->sectPerSU;
3555 	config->SUsPerPU = clabel->SUsPerPU;
3556 	config->SUsPerRU = clabel->SUsPerRU;
3557 	config->parityConfig = clabel->parityConfig;
3558 	/* XXX... */
3559 	strcpy(config->diskQueueType,"fifo");
3560 	config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3561 	config->layoutSpecificSize = 0; /* XXX ?? */
3562 
3563 	while(ac!=NULL) {
3564 		/* row/col values will be in range due to the checks
3565 		   in reasonable_label() */
3566 		strcpy(config->devnames[0][ac->clabel->column],
3567 		       ac->devname);
3568 		ac = ac->next;
3569 	}
3570 
3571 	for(i=0;i<RF_MAXDBGV;i++) {
3572 		config->debugVars[i][0] = 0;
3573 	}
3574 }
3575 
3576 int
3577 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3578 {
3579 	RF_ComponentLabel_t *clabel;
3580 	int column;
3581 	int sparecol;
3582 
3583 	raidPtr->autoconfigure = new_value;
3584 
3585 	for(column=0; column<raidPtr->numCol; column++) {
3586 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
3587 			clabel = raidget_component_label(raidPtr, column);
3588 			clabel->autoconfigure = new_value;
3589 			raidflush_component_label(raidPtr, column);
3590 		}
3591 	}
3592 	for(column = 0; column < raidPtr->numSpare ; column++) {
3593 		sparecol = raidPtr->numCol + column;
3594 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3595 			clabel = raidget_component_label(raidPtr, sparecol);
3596 			clabel->autoconfigure = new_value;
3597 			raidflush_component_label(raidPtr, sparecol);
3598 		}
3599 	}
3600 	return(new_value);
3601 }
3602 
3603 int
3604 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3605 {
3606 	RF_ComponentLabel_t *clabel;
3607 	int column;
3608 	int sparecol;
3609 
3610 	raidPtr->root_partition = new_value;
3611 	for(column=0; column<raidPtr->numCol; column++) {
3612 		if (raidPtr->Disks[column].status == rf_ds_optimal) {
3613 			clabel = raidget_component_label(raidPtr, column);
3614 			clabel->root_partition = new_value;
3615 			raidflush_component_label(raidPtr, column);
3616 		}
3617 	}
3618 	for(column = 0; column < raidPtr->numSpare ; column++) {
3619 		sparecol = raidPtr->numCol + column;
3620 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3621 			clabel = raidget_component_label(raidPtr, sparecol);
3622 			clabel->root_partition = new_value;
3623 			raidflush_component_label(raidPtr, sparecol);
3624 		}
3625 	}
3626 	return(new_value);
3627 }
3628 
3629 void
3630 rf_release_all_vps(RF_ConfigSet_t *cset)
3631 {
3632 	RF_AutoConfig_t *ac;
3633 
3634 	ac = cset->ac;
3635 	while(ac!=NULL) {
3636 		/* Close the vp, and give it back */
3637 		if (ac->vp) {
3638 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
3639 			VOP_CLOSE(ac->vp, FREAD, NOCRED);
3640 			vput(ac->vp);
3641 			ac->vp = NULL;
3642 		}
3643 		ac = ac->next;
3644 	}
3645 }
3646 
3647 
3648 void
3649 rf_cleanup_config_set(RF_ConfigSet_t *cset)
3650 {
3651 	RF_AutoConfig_t *ac;
3652 	RF_AutoConfig_t *next_ac;
3653 
3654 	ac = cset->ac;
3655 	while(ac!=NULL) {
3656 		next_ac = ac->next;
3657 		/* nuke the label */
3658 		free(ac->clabel, M_RAIDFRAME);
3659 		/* cleanup the config structure */
3660 		free(ac, M_RAIDFRAME);
3661 		/* "next.." */
3662 		ac = next_ac;
3663 	}
3664 	/* and, finally, nuke the config set */
3665 	free(cset, M_RAIDFRAME);
3666 }
3667 
3668 
3669 void
3670 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3671 {
3672 	/* current version number */
3673 	clabel->version = RF_COMPONENT_LABEL_VERSION;
3674 	clabel->serial_number = raidPtr->serial_number;
3675 	clabel->mod_counter = raidPtr->mod_counter;
3676 
3677 	clabel->num_rows = 1;
3678 	clabel->num_columns = raidPtr->numCol;
3679 	clabel->clean = RF_RAID_DIRTY; /* not clean */
3680 	clabel->status = rf_ds_optimal; /* "It's good!" */
3681 
3682 	clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3683 	clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3684 	clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3685 
3686 	clabel->blockSize = raidPtr->bytesPerSector;
3687 	rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
3688 
3689 	/* XXX not portable */
3690 	clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3691 	clabel->maxOutstanding = raidPtr->maxOutstanding;
3692 	clabel->autoconfigure = raidPtr->autoconfigure;
3693 	clabel->root_partition = raidPtr->root_partition;
3694 	clabel->last_unit = raidPtr->raidid;
3695 	clabel->config_order = raidPtr->config_order;
3696 
3697 #ifndef RF_NO_PARITY_MAP
3698 	rf_paritymap_init_label(raidPtr->parity_map, clabel);
3699 #endif
3700 }
3701 
3702 int
3703 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3704 {
3705 	RF_Raid_t *raidPtr;
3706 	RF_Config_t *config;
3707 	int raidID;
3708 	int retcode;
3709 
3710 #ifdef DEBUG
3711 	printf("RAID autoconfigure\n");
3712 #endif
3713 
3714 	retcode = 0;
3715 	*unit = -1;
3716 
3717 	/* 1. Create a config structure */
3718 
3719 	config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3720 				       M_RAIDFRAME,
3721 				       M_NOWAIT);
3722 	if (config==NULL) {
3723 		printf("Out of mem!?!?\n");
3724 				/* XXX do something more intelligent here. */
3725 		return(1);
3726 	}
3727 
3728 	memset(config, 0, sizeof(RF_Config_t));
3729 
3730 	/*
3731 	   2. Figure out what RAID ID this one is supposed to live at
3732 	   See if we can get the same RAID dev that it was configured
3733 	   on last time..
3734 	*/
3735 
3736 	raidID = cset->ac->clabel->last_unit;
3737 	if ((raidID < 0) || (raidID >= numraid)) {
3738 		/* let's not wander off into lala land. */
3739 		raidID = numraid - 1;
3740 	}
3741 	if (raidPtrs[raidID]->valid != 0) {
3742 
3743 		/*
3744 		   Nope... Go looking for an alternative...
3745 		   Start high so we don't immediately use raid0 if that's
3746 		   not taken.
3747 		*/
3748 
3749 		for(raidID = numraid - 1; raidID >= 0; raidID--) {
3750 			if (raidPtrs[raidID]->valid == 0) {
3751 				/* can use this one! */
3752 				break;
3753 			}
3754 		}
3755 	}
3756 
3757 	if (raidID < 0) {
3758 		/* punt... */
3759 		printf("Unable to auto configure this set!\n");
3760 		printf("(Out of RAID devs!)\n");
3761 		free(config, M_RAIDFRAME);
3762 		return(1);
3763 	}
3764 
3765 #ifdef DEBUG
3766 	printf("Configuring raid%d:\n",raidID);
3767 #endif
3768 
3769 	raidPtr = raidPtrs[raidID];
3770 
3771 	/* XXX all this stuff should be done SOMEWHERE ELSE! */
3772 	raidPtr->raidid = raidID;
3773 	raidPtr->openings = RAIDOUTSTANDING;
3774 
3775 	/* 3. Build the configuration structure */
3776 	rf_create_configuration(cset->ac, config, raidPtr);
3777 
3778 	/* 4. Do the configuration */
3779 	retcode = rf_Configure(raidPtr, config, cset->ac);
3780 
3781 	if (retcode == 0) {
3782 
3783 		raidinit(raidPtrs[raidID]);
3784 
3785 		rf_markalldirty(raidPtrs[raidID]);
3786 		raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
3787 		if (cset->ac->clabel->root_partition==1) {
3788 			/* everything configured just fine.  Make a note
3789 			   that this set is eligible to be root. */
3790 			cset->rootable = 1;
3791 			/* XXX do this here? */
3792 			raidPtrs[raidID]->root_partition = 1;
3793 		}
3794 	}
3795 
3796 	/* 5. Cleanup */
3797 	free(config, M_RAIDFRAME);
3798 
3799 	*unit = raidID;
3800 	return(retcode);
3801 }
3802 
3803 void
3804 rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3805 {
3806 	struct buf *bp;
3807 
3808 	bp = (struct buf *)desc->bp;
3809 	disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3810 	    (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
3811 }
3812 
3813 void
3814 rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3815 	     size_t xmin, size_t xmax)
3816 {
3817 	pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
3818 	pool_sethiwat(p, xmax);
3819 	pool_prime(p, xmin);
3820 	pool_setlowat(p, xmin);
3821 }
3822 
3823 /*
3824  * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
3825  * if there is IO pending and if that IO could possibly be done for a
3826  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
3827  * otherwise.
3828  *
3829  */
3830 
3831 int
3832 rf_buf_queue_check(int raidid)
3833 {
3834 	if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) &&
3835 	    raidPtrs[raidid]->openings > 0) {
3836 		/* there is work to do */
3837 		return 0;
3838 	}
3839 	/* default is nothing to do */
3840 	return 1;
3841 }
3842 
3843 int
3844 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
3845 {
3846 	uint64_t numsecs;
3847 	unsigned secsize;
3848 	int error;
3849 
3850 	error = getdisksize(vp, &numsecs, &secsize);
3851 	if (error == 0) {
3852 		diskPtr->blockSize = secsize;
3853 		diskPtr->numBlocks = numsecs - rf_protectedSectors;
3854 		diskPtr->partitionSize = numsecs;
3855 		return 0;
3856 	}
3857 	return error;
3858 }
3859 
3860 static int
3861 raid_match(device_t self, cfdata_t cfdata, void *aux)
3862 {
3863 	return 1;
3864 }
3865 
3866 static void
3867 raid_attach(device_t parent, device_t self, void *aux)
3868 {
3869 
3870 }
3871 
3872 
3873 static int
3874 raid_detach(device_t self, int flags)
3875 {
3876 	int error;
3877 	struct raid_softc *rs = &raid_softc[device_unit(self)];
3878 
3879 	if ((error = raidlock(rs)) != 0)
3880 		return (error);
3881 
3882 	error = raid_detach_unlocked(rs);
3883 
3884 	raidunlock(rs);
3885 
3886 	return error;
3887 }
3888 
3889 static void
3890 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
3891 {
3892 	prop_dictionary_t disk_info, odisk_info, geom;
3893 	disk_info = prop_dictionary_create();
3894 	geom = prop_dictionary_create();
3895 	prop_dictionary_set_uint64(geom, "sectors-per-unit",
3896 				   raidPtr->totalSectors);
3897 	prop_dictionary_set_uint32(geom, "sector-size",
3898 				   raidPtr->bytesPerSector);
3899 
3900 	prop_dictionary_set_uint16(geom, "sectors-per-track",
3901 				   raidPtr->Layout.dataSectorsPerStripe);
3902 	prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
3903 				   4 * raidPtr->numCol);
3904 
3905 	prop_dictionary_set_uint64(geom, "cylinders-per-unit",
3906 	   raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
3907 	   (4 * raidPtr->numCol)));
3908 
3909 	prop_dictionary_set(disk_info, "geometry", geom);
3910 	prop_object_release(geom);
3911 	prop_dictionary_set(device_properties(rs->sc_dev),
3912 			    "disk-info", disk_info);
3913 	odisk_info = rs->sc_dkdev.dk_info;
3914 	rs->sc_dkdev.dk_info = disk_info;
3915 	if (odisk_info)
3916 		prop_object_release(odisk_info);
3917 }
3918 
3919 /*
3920  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3921  * We end up returning whatever error was returned by the first cache flush
3922  * that fails.
3923  */
3924 
3925 int
3926 rf_sync_component_caches(RF_Raid_t *raidPtr)
3927 {
3928 	int c, sparecol;
3929 	int e,error;
3930 	int force = 1;
3931 
3932 	error = 0;
3933 	for (c = 0; c < raidPtr->numCol; c++) {
3934 		if (raidPtr->Disks[c].status == rf_ds_optimal) {
3935 			e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3936 					  &force, FWRITE, NOCRED);
3937 			if (e) {
3938 				if (e != ENODEV)
3939 					printf("raid%d: cache flush to component %s failed.\n",
3940 					       raidPtr->raidid, raidPtr->Disks[c].devname);
3941 				if (error == 0) {
3942 					error = e;
3943 				}
3944 			}
3945 		}
3946 	}
3947 
3948 	for( c = 0; c < raidPtr->numSpare ; c++) {
3949 		sparecol = raidPtr->numCol + c;
3950 		/* Need to ensure that the reconstruct actually completed! */
3951 		if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3952 			e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3953 					  DIOCCACHESYNC, &force, FWRITE, NOCRED);
3954 			if (e) {
3955 				if (e != ENODEV)
3956 					printf("raid%d: cache flush to component %s failed.\n",
3957 					       raidPtr->raidid, raidPtr->Disks[sparecol].devname);
3958 				if (error == 0) {
3959 					error = e;
3960 				}
3961 			}
3962 		}
3963 	}
3964 	return error;
3965 }
3966