xref: /netbsd-src/sys/dev/raidframe/rf_disks.c (revision 21e37cc72a480a47828990a439cde7ac9ffaf0c6)
1 /*	$NetBSD: rf_disks.c,v 1.53 2004/05/22 20:56:52 oster Exp $	*/
2 /*-
3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Greg Oster
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *        This product includes software developed by the NetBSD
20  *        Foundation, Inc. and its contributors.
21  * 4. Neither the name of The NetBSD Foundation nor the names of its
22  *    contributors may be used to endorse or promote products derived
23  *    from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 /*
39  * Copyright (c) 1995 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Author: Mark Holland
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  */
64 
65 /***************************************************************
66  * rf_disks.c -- code to perform operations on the actual disks
67  ***************************************************************/
68 
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.53 2004/05/22 20:56:52 oster Exp $");
71 
72 #include <dev/raidframe/raidframevar.h>
73 
74 #include "rf_raid.h"
75 #include "rf_alloclist.h"
76 #include "rf_utils.h"
77 #include "rf_general.h"
78 #include "rf_options.h"
79 #include "rf_kintf.h"
80 #include "rf_netbsd.h"
81 
82 #include <sys/param.h>
83 #include <sys/systm.h>
84 #include <sys/proc.h>
85 #include <sys/ioctl.h>
86 #include <sys/fcntl.h>
87 #include <sys/vnode.h>
88 
89 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
90 static void rf_print_label_status( RF_Raid_t *, int, char *,
91 				  RF_ComponentLabel_t *);
92 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
93 				  RF_ComponentLabel_t *, int, int );
94 
95 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
96 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
97 
98 /**************************************************************************
99  *
100  * initialize the disks comprising the array
101  *
102  * We want the spare disks to have regular row,col numbers so that we can
103  * easily substitue a spare for a failed disk.  But, the driver code assumes
104  * throughout that the array contains numRow by numCol _non-spare_ disks, so
105  * it's not clear how to fit in the spares.  This is an unfortunate holdover
106  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
107  * rest, and put all the spares in it.  This probably needs to get changed
108  * eventually.
109  *
110  **************************************************************************/
111 
112 int
113 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
114 		  RF_Config_t *cfgPtr)
115 {
116 	RF_RaidDisk_t *disks;
117 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
118 	RF_RowCol_t c;
119 	int bs, ret;
120 	unsigned i, count, foundone = 0, numFailuresThisRow;
121 	int force;
122 
123 	force = cfgPtr->force;
124 
125 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
126 	if (ret)
127 		goto fail;
128 
129 	disks = raidPtr->Disks;
130 
131 	numFailuresThisRow = 0;
132 	for (c = 0; c < raidPtr->numCol; c++) {
133 		ret = rf_ConfigureDisk(raidPtr,
134 				       &cfgPtr->devnames[0][c][0],
135 				       &disks[c], c);
136 
137 		if (ret)
138 			goto fail;
139 
140 		if (disks[c].status == rf_ds_optimal) {
141 			raidread_component_label(
142 						 raidPtr->raid_cinfo[c].ci_dev,
143 						 raidPtr->raid_cinfo[c].ci_vp,
144 						 &raidPtr->raid_cinfo[c].ci_label);
145 		}
146 
147 		if (disks[c].status != rf_ds_optimal) {
148 			numFailuresThisRow++;
149 		} else {
150 			if (disks[c].numBlocks < min_numblks)
151 				min_numblks = disks[c].numBlocks;
152 			DPRINTF6("Disk at col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
153 				 c, disks[c].devname,
154 				 (long int) disks[c].numBlocks,
155 				 disks[c].blockSize,
156 				 (long int) disks[c].numBlocks *
157 				 disks[c].blockSize / 1024 / 1024);
158 		}
159 	}
160 	/* XXX fix for n-fault tolerant */
161 	/* XXX this should probably check to see how many failures
162 	   we can handle for this configuration! */
163 	if (numFailuresThisRow > 0)
164 		raidPtr->status = rf_rs_degraded;
165 
166 	/* all disks must be the same size & have the same block size, bs must
167 	 * be a power of 2 */
168 	bs = 0;
169 	foundone = 0;
170 	for (c = 0; c < raidPtr->numCol; c++) {
171 		if (disks[c].status == rf_ds_optimal) {
172 			bs = disks[c].blockSize;
173 			foundone = 1;
174 			break;
175 		}
176 	}
177 	if (!foundone) {
178 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
179 		ret = EINVAL;
180 		goto fail;
181 	}
182 	for (count = 0, i = 1; i; i <<= 1)
183 		if (bs & i)
184 			count++;
185 	if (count != 1) {
186 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
187 		ret = EINVAL;
188 		goto fail;
189 	}
190 
191 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
192 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
193 		if (force != 0) {
194 			printf("raid%d: Fatal errors being ignored.\n",
195 			       raidPtr->raidid);
196 		} else {
197 			ret = EINVAL;
198 			goto fail;
199 		}
200 	}
201 
202 	for (c = 0; c < raidPtr->numCol; c++) {
203 		if (disks[c].status == rf_ds_optimal) {
204 			if (disks[c].blockSize != bs) {
205 				RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
206 				ret = EINVAL;
207 				goto fail;
208 			}
209 			if (disks[c].numBlocks != min_numblks) {
210 				RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
211 					     c, (int) min_numblks);
212 				disks[c].numBlocks = min_numblks;
213 			}
214 		}
215 	}
216 
217 	raidPtr->sectorsPerDisk = min_numblks;
218 	raidPtr->logBytesPerSector = ffs(bs) - 1;
219 	raidPtr->bytesPerSector = bs;
220 	raidPtr->sectorMask = bs - 1;
221 	return (0);
222 
223 fail:
224 
225 	rf_UnconfigureVnodes( raidPtr );
226 
227 	return (ret);
228 }
229 
230 
231 /****************************************************************************
232  * set up the data structures describing the spare disks in the array
233  * recall from the above comment that the spare disk descriptors are stored
234  * in row zero, which is specially expanded to hold them.
235  ****************************************************************************/
236 int
237 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
238 		       RF_Config_t *cfgPtr)
239 {
240 	int     i, ret;
241 	unsigned int bs;
242 	RF_RaidDisk_t *disks;
243 	int     num_spares_done;
244 
245 	num_spares_done = 0;
246 
247 	/* The space for the spares should have already been allocated by
248 	 * ConfigureDisks() */
249 
250 	disks = &raidPtr->Disks[raidPtr->numCol];
251 	for (i = 0; i < raidPtr->numSpare; i++) {
252 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
253 				       &disks[i], raidPtr->numCol + i);
254 		if (ret)
255 			goto fail;
256 		if (disks[i].status != rf_ds_optimal) {
257 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
258 				     &cfgPtr->spare_names[i][0]);
259 		} else {
260 			disks[i].status = rf_ds_spare;	/* change status to
261 							 * spare */
262 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
263 			    disks[i].devname,
264 			    (long int) disks[i].numBlocks, disks[i].blockSize,
265 			    (long int) disks[i].numBlocks *
266 				 disks[i].blockSize / 1024 / 1024);
267 		}
268 		num_spares_done++;
269 	}
270 
271 	/* check sizes and block sizes on spare disks */
272 	bs = 1 << raidPtr->logBytesPerSector;
273 	for (i = 0; i < raidPtr->numSpare; i++) {
274 		if (disks[i].blockSize != bs) {
275 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
276 			ret = EINVAL;
277 			goto fail;
278 		}
279 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
280 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
281 				     disks[i].devname, disks[i].blockSize,
282 				     (long int) raidPtr->sectorsPerDisk);
283 			ret = EINVAL;
284 			goto fail;
285 		} else
286 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
287 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
288 
289 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
290 			}
291 	}
292 
293 	return (0);
294 
295 fail:
296 
297 	/* Release the hold on the main components.  We've failed to allocate
298 	 * a spare, and since we're failing, we need to free things..
299 
300 	 XXX failing to allocate a spare is *not* that big of a deal...
301 	 We *can* survive without it, if need be, esp. if we get hot
302 	 adding working.
303 
304 	 If we don't fail out here, then we need a way to remove this spare...
305 	 that should be easier to do here than if we are "live"...
306 
307 	 */
308 
309 	rf_UnconfigureVnodes( raidPtr );
310 
311 	return (ret);
312 }
313 
314 static int
315 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
316 {
317 	int ret;
318 
319 	/* We allocate RF_MAXSPARE on the first row so that we
320 	   have room to do hot-swapping of spares */
321 	RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
322 			sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
323 			raidPtr->cleanupList);
324 	if (raidPtr->Disks == NULL) {
325 		ret = ENOMEM;
326 		goto fail;
327 	}
328 
329 	/* get space for device specific stuff.. */
330 	RF_MallocAndAdd(raidPtr->raid_cinfo,
331 			(raidPtr->numCol + RF_MAXSPARE) *
332 			sizeof(struct raidcinfo), (struct raidcinfo *),
333 			raidPtr->cleanupList);
334 
335 	if (raidPtr->raid_cinfo == NULL) {
336 		ret = ENOMEM;
337 		goto fail;
338 	}
339 
340 	return(0);
341 fail:
342 	rf_UnconfigureVnodes( raidPtr );
343 
344 	return(ret);
345 }
346 
347 
348 /* configure a single disk during auto-configuration at boot */
349 int
350 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
351 		      RF_AutoConfig_t *auto_config)
352 {
353 	RF_RaidDisk_t *disks;
354 	RF_RaidDisk_t *diskPtr;
355 	RF_RowCol_t c;
356 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
357 	int bs, ret;
358 	int numFailuresThisRow;
359 	RF_AutoConfig_t *ac;
360 	int parity_good;
361 	int mod_counter;
362 	int mod_counter_found;
363 
364 #if DEBUG
365 	printf("Starting autoconfiguration of RAID set...\n");
366 #endif
367 
368 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
369 	if (ret)
370 		goto fail;
371 
372 	disks = raidPtr->Disks;
373 
374 	/* assume the parity will be fine.. */
375 	parity_good = RF_RAID_CLEAN;
376 
377 	/* Check for mod_counters that are too low */
378 	mod_counter_found = 0;
379 	mod_counter = 0;
380 	ac = auto_config;
381 	while(ac!=NULL) {
382 		if (mod_counter_found==0) {
383 			mod_counter = ac->clabel->mod_counter;
384 			mod_counter_found = 1;
385 		} else {
386 			if (ac->clabel->mod_counter > mod_counter) {
387 				mod_counter = ac->clabel->mod_counter;
388 			}
389 		}
390 		ac->flag = 0; /* clear the general purpose flag */
391 		ac = ac->next;
392 	}
393 
394 	bs = 0;
395 
396 	numFailuresThisRow = 0;
397 	for (c = 0; c < raidPtr->numCol; c++) {
398 		diskPtr = &disks[c];
399 
400 		/* find this row/col in the autoconfig */
401 #if DEBUG
402 		printf("Looking for %d in autoconfig\n",c);
403 #endif
404 		ac = auto_config;
405 		while(ac!=NULL) {
406 			if (ac->clabel==NULL) {
407 				/* big-time bad news. */
408 				goto fail;
409 			}
410 			if ((ac->clabel->column == c) &&
411 			    (ac->clabel->mod_counter == mod_counter)) {
412 				/* it's this one... */
413 				/* flag it as 'used', so we don't
414 				   free it later. */
415 				ac->flag = 1;
416 #if DEBUG
417 				printf("Found: %s at %d\n",
418 				       ac->devname,c);
419 #endif
420 
421 				break;
422 			}
423 			ac=ac->next;
424 		}
425 
426 		if (ac==NULL) {
427 			/* we didn't find an exact match with a
428 			   correct mod_counter above... can we find
429 			   one with an incorrect mod_counter to use
430 			   instead?  (this one, if we find it, will be
431 			   marked as failed once the set configures)
432 			*/
433 
434 			ac = auto_config;
435 			while(ac!=NULL) {
436 				if (ac->clabel==NULL) {
437 					/* big-time bad news. */
438 					goto fail;
439 				}
440 				if (ac->clabel->column == c) {
441 					/* it's this one...
442 					   flag it as 'used', so we
443 					   don't free it later. */
444 					ac->flag = 1;
445 #if DEBUG
446 					printf("Found(low mod_counter): %s at %d\n",
447 					       ac->devname,c);
448 #endif
449 
450 					break;
451 				}
452 				ac=ac->next;
453 			}
454 		}
455 
456 
457 
458 		if (ac!=NULL) {
459 			/* Found it.  Configure it.. */
460 			diskPtr->blockSize = ac->clabel->blockSize;
461 			diskPtr->numBlocks = ac->clabel->numBlocks;
462 			/* Note: rf_protectedSectors is already
463 			   factored into numBlocks here */
464 			raidPtr->raid_cinfo[c].ci_vp = ac->vp;
465 			raidPtr->raid_cinfo[c].ci_dev = ac->dev;
466 
467 			memcpy(&raidPtr->raid_cinfo[c].ci_label,
468 			    ac->clabel, sizeof(*ac->clabel));
469 			snprintf(diskPtr->devname, sizeof(diskPtr->devname),
470 			    "/dev/%s", ac->devname);
471 
472 			/* note the fact that this component was
473 			   autoconfigured.  You'll need this info
474 			   later.  Trust me :) */
475 			diskPtr->auto_configured = 1;
476 			diskPtr->dev = ac->dev;
477 
478 			/*
479 			 * we allow the user to specify that
480 			 * only a fraction of the disks should
481 			 * be used this is just for debug: it
482 			 * speeds up the parity scan
483 			 */
484 
485 			diskPtr->numBlocks = diskPtr->numBlocks *
486 				rf_sizePercentage / 100;
487 
488 			/* XXX these will get set multiple times,
489 			   but since we're autoconfiguring, they'd
490 			   better be always the same each time!
491 			   If not, this is the least of your worries */
492 
493 			bs = diskPtr->blockSize;
494 			min_numblks = diskPtr->numBlocks;
495 
496 			/* this gets done multiple times, but that's
497 			   fine -- the serial number will be the same
498 			   for all components, guaranteed */
499 			raidPtr->serial_number = ac->clabel->serial_number;
500 			/* check the last time the label was modified */
501 
502 			if (ac->clabel->mod_counter != mod_counter) {
503 				/* Even though we've filled in all of
504 				   the above, we don't trust this
505 				   component since it's modification
506 				   counter is not in sync with the
507 				   rest, and we really consider it to
508 				   be failed.  */
509 				disks[c].status = rf_ds_failed;
510 				numFailuresThisRow++;
511 			} else {
512 				if (ac->clabel->clean != RF_RAID_CLEAN) {
513 					parity_good = RF_RAID_DIRTY;
514 				}
515 			}
516 		} else {
517 			/* Didn't find it at all!!  Component must
518 			   really be dead */
519 			disks[c].status = rf_ds_failed;
520 			snprintf(disks[c].devname, sizeof(disks[c].devname),
521 			    "component%d", c);
522 			numFailuresThisRow++;
523 		}
524 	}
525 	/* XXX fix for n-fault tolerant */
526 	/* XXX this should probably check to see how many failures
527 	   we can handle for this configuration! */
528 	if (numFailuresThisRow > 0) {
529 		raidPtr->status = rf_rs_degraded;
530 		raidPtr->numFailures = numFailuresThisRow;
531 	}
532 
533 	/* close the device for the ones that didn't get used */
534 
535 	ac = auto_config;
536 	while(ac!=NULL) {
537 		if (ac->flag == 0) {
538 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
539 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED, 0);
540 			vput(ac->vp);
541 			ac->vp = NULL;
542 #if DEBUG
543 			printf("Released %s from auto-config set.\n",
544 			       ac->devname);
545 #endif
546 		}
547 		ac = ac->next;
548 	}
549 
550 	raidPtr->mod_counter = mod_counter;
551 
552 	/* note the state of the parity, if any */
553 	raidPtr->parity_good = parity_good;
554 	raidPtr->sectorsPerDisk = min_numblks;
555 	raidPtr->logBytesPerSector = ffs(bs) - 1;
556 	raidPtr->bytesPerSector = bs;
557 	raidPtr->sectorMask = bs - 1;
558 	return (0);
559 
560 fail:
561 
562 	rf_UnconfigureVnodes( raidPtr );
563 
564 	return (ret);
565 
566 }
567 
568 /* configure a single disk in the array */
569 int
570 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *buf, RF_RaidDisk_t *diskPtr,
571 		 RF_RowCol_t col)
572 {
573 	char   *p;
574 	struct partinfo dpart;
575 	struct vnode *vp;
576 	struct vattr va;
577 	struct proc *proc;
578 	int     error;
579 
580 	p = rf_find_non_white(buf);
581 	if (p[strlen(p) - 1] == '\n') {
582 		/* strip off the newline */
583 		p[strlen(p) - 1] = '\0';
584 	}
585 	(void) strcpy(diskPtr->devname, p);
586 
587 	proc = raidPtr->engine_thread;
588 
589 	/* Let's start by claiming the component is fine and well... */
590 	diskPtr->status = rf_ds_optimal;
591 
592 	raidPtr->raid_cinfo[col].ci_vp = NULL;
593 	raidPtr->raid_cinfo[col].ci_dev = 0;
594 
595 	if (!strcmp("absent", diskPtr->devname)) {
596 		printf("Ignoring missing component at column %d\n", col);
597 		sprintf(diskPtr->devname, "component%d", col);
598 		diskPtr->status = rf_ds_failed;
599 		return (0);
600 	}
601 
602 	error = raidlookup(diskPtr->devname, proc, &vp);
603 	if (error) {
604 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
605 		if (error == ENXIO) {
606 			/* the component isn't there... must be dead :-( */
607 			diskPtr->status = rf_ds_failed;
608 		} else {
609 			return (error);
610 		}
611 	}
612 	if (diskPtr->status == rf_ds_optimal) {
613 
614 		if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
615 			return (error);
616 		}
617 		error = VOP_IOCTL(vp, DIOCGPART, &dpart,
618 				  FREAD, proc->p_ucred, proc);
619 		if (error) {
620 			return (error);
621 		}
622 
623 		diskPtr->blockSize = dpart.disklab->d_secsize;
624 
625 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
626 		diskPtr->partitionSize = dpart.part->p_size;
627 
628 		raidPtr->raid_cinfo[col].ci_vp = vp;
629 		raidPtr->raid_cinfo[col].ci_dev = va.va_rdev;
630 
631 		/* This component was not automatically configured */
632 		diskPtr->auto_configured = 0;
633 		diskPtr->dev = va.va_rdev;
634 
635 		/* we allow the user to specify that only a fraction of the
636 		 * disks should be used this is just for debug:  it speeds up
637 		 * the parity scan */
638 		diskPtr->numBlocks = diskPtr->numBlocks *
639 			rf_sizePercentage / 100;
640 	}
641 	return (0);
642 }
643 
644 static void
645 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
646 		      RF_ComponentLabel_t *ci_label)
647 {
648 
649 	printf("raid%d: Component %s being configured at col: %d\n",
650 	       raidPtr->raidid, dev_name, column );
651 	printf("         Column: %d Num Columns: %d\n",
652 	       ci_label->column,
653 	       ci_label->num_columns);
654 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
655 	       ci_label->version, ci_label->serial_number,
656 	       ci_label->mod_counter);
657 	printf("         Clean: %s Status: %d\n",
658 	       ci_label->clean ? "Yes" : "No", ci_label->status );
659 }
660 
661 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
662 				 char *dev_name, RF_ComponentLabel_t *ci_label,
663 				 int serial_number, int mod_counter)
664 {
665 	int fatal_error = 0;
666 
667 	if (serial_number != ci_label->serial_number) {
668 		printf("%s has a different serial number: %d %d\n",
669 		       dev_name, serial_number, ci_label->serial_number);
670 		fatal_error = 1;
671 	}
672 	if (mod_counter != ci_label->mod_counter) {
673 		printf("%s has a different modfication count: %d %d\n",
674 		       dev_name, mod_counter, ci_label->mod_counter);
675 	}
676 
677 	if (row != ci_label->row) {
678 		printf("Row out of alignment for: %s\n", dev_name);
679 		fatal_error = 1;
680 	}
681 	if (column != ci_label->column) {
682 		printf("Column out of alignment for: %s\n", dev_name);
683 		fatal_error = 1;
684 	}
685 	if (raidPtr->numCol != ci_label->num_columns) {
686 		printf("Number of columns do not match for: %s\n", dev_name);
687 		fatal_error = 1;
688 	}
689 	if (ci_label->clean == 0) {
690 		/* it's not clean, but that's not fatal */
691 		printf("%s is not clean!\n", dev_name);
692 	}
693 	return(fatal_error);
694 }
695 
696 
697 /*
698 
699    rf_CheckLabels() - check all the component labels for consistency.
700    Return an error if there is anything major amiss.
701 
702  */
703 
704 int
705 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
706 {
707 	int c;
708 	char *dev_name;
709 	RF_ComponentLabel_t *ci_label;
710 	int serial_number = 0;
711 	int mod_number = 0;
712 	int fatal_error = 0;
713 	int mod_values[4];
714 	int mod_count[4];
715 	int ser_values[4];
716 	int ser_count[4];
717 	int num_ser;
718 	int num_mod;
719 	int i;
720 	int found;
721 	int hosed_column;
722 	int too_fatal;
723 	int parity_good;
724 	int force;
725 
726 	hosed_column = -1;
727 	too_fatal = 0;
728 	force = cfgPtr->force;
729 
730 	/*
731 	   We're going to try to be a little intelligent here.  If one
732 	   component's label is bogus, and we can identify that it's the
733 	   *only* one that's gone, we'll mark it as "failed" and allow
734 	   the configuration to proceed.  This will be the *only* case
735 	   that we'll proceed if there would be (otherwise) fatal errors.
736 
737 	   Basically we simply keep a count of how many components had
738 	   what serial number.  If all but one agree, we simply mark
739 	   the disagreeing component as being failed, and allow
740 	   things to come up "normally".
741 
742 	   We do this first for serial numbers, and then for "mod_counter".
743 
744 	 */
745 
746 	num_ser = 0;
747 	num_mod = 0;
748 
749 	for (c = 0; c < raidPtr->numCol; c++) {
750 		ci_label = &raidPtr->raid_cinfo[c].ci_label;
751 		found=0;
752 		for(i=0;i<num_ser;i++) {
753 			if (ser_values[i] == ci_label->serial_number) {
754 				ser_count[i]++;
755 				found=1;
756 				break;
757 			}
758 		}
759 		if (!found) {
760 			ser_values[num_ser] = ci_label->serial_number;
761 			ser_count[num_ser] = 1;
762 			num_ser++;
763 			if (num_ser>2) {
764 				fatal_error = 1;
765 				break;
766 			}
767 		}
768 		found=0;
769 		for(i=0;i<num_mod;i++) {
770 			if (mod_values[i] == ci_label->mod_counter) {
771 				mod_count[i]++;
772 				found=1;
773 				break;
774 			}
775 		}
776 		if (!found) {
777 			mod_values[num_mod] = ci_label->mod_counter;
778 			mod_count[num_mod] = 1;
779 			num_mod++;
780 			if (num_mod>2) {
781 				fatal_error = 1;
782 				break;
783 			}
784 		}
785 	}
786 #if DEBUG
787 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
788 	for(i=0;i<num_ser;i++) {
789 		printf("%d %d\n", ser_values[i], ser_count[i]);
790 	}
791 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
792 	for(i=0;i<num_mod;i++) {
793 		printf("%d %d\n", mod_values[i], mod_count[i]);
794 	}
795 #endif
796 	serial_number = ser_values[0];
797 	if (num_ser == 2) {
798 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
799 			/* Locate the maverick component */
800 			if (ser_count[1] > ser_count[0]) {
801 				serial_number = ser_values[1];
802 			}
803 
804 			for (c = 0; c < raidPtr->numCol; c++) {
805 				ci_label = &raidPtr->raid_cinfo[c].ci_label;
806 				if (serial_number != ci_label->serial_number) {
807 					hosed_column = c;
808 					break;
809 				}
810 			}
811 			printf("Hosed component: %s\n",
812 			       &cfgPtr->devnames[0][hosed_column][0]);
813 			if (!force) {
814 				/* we'll fail this component, as if there are
815 				   other major errors, we arn't forcing things
816 				   and we'll abort the config anyways */
817 				raidPtr->Disks[hosed_column].status
818 					= rf_ds_failed;
819 				raidPtr->numFailures++;
820 				raidPtr->status = rf_rs_degraded;
821 			}
822 		} else {
823 			too_fatal = 1;
824 		}
825 		if (cfgPtr->parityConfig == '0') {
826 			/* We've identified two different serial numbers.
827 			   RAID 0 can't cope with that, so we'll punt */
828 			too_fatal = 1;
829 		}
830 
831 	}
832 
833 	/* record the serial number for later.  If we bail later, setting
834 	   this doesn't matter, otherwise we've got the best guess at the
835 	   correct serial number */
836 	raidPtr->serial_number = serial_number;
837 
838 	mod_number = mod_values[0];
839 	if (num_mod == 2) {
840 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
841 			/* Locate the maverick component */
842 			if (mod_count[1] > mod_count[0]) {
843 				mod_number = mod_values[1];
844 			} else if (mod_count[1] < mod_count[0]) {
845 				mod_number = mod_values[0];
846 			} else {
847 				/* counts of different modification values
848 				   are the same.   Assume greater value is
849 				   the correct one, all other things
850 				   considered */
851 				if (mod_values[0] > mod_values[1]) {
852 					mod_number = mod_values[0];
853 				} else {
854 					mod_number = mod_values[1];
855 				}
856 
857 			}
858 
859 			for (c = 0; c < raidPtr->numCol; c++) {
860 				ci_label = &raidPtr->raid_cinfo[c].ci_label;
861 				if (mod_number != ci_label->mod_counter) {
862 					if (hosed_column == c) {
863 						/* same one.  Can
864 						   deal with it.  */
865 					} else {
866 						hosed_column = c;
867 						if (num_ser != 1) {
868 							too_fatal = 1;
869 							break;
870 						}
871 					}
872 				}
873 			}
874 			printf("Hosed component: %s\n",
875 			       &cfgPtr->devnames[0][hosed_column][0]);
876 			if (!force) {
877 				/* we'll fail this component, as if there are
878 				   other major errors, we arn't forcing things
879 				   and we'll abort the config anyways */
880 				if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
881 					raidPtr->Disks[hosed_column].status
882 						= rf_ds_failed;
883 					raidPtr->numFailures++;
884 					raidPtr->status = rf_rs_degraded;
885 				}
886 			}
887 		} else {
888 			too_fatal = 1;
889 		}
890 		if (cfgPtr->parityConfig == '0') {
891 			/* We've identified two different mod counters.
892 			   RAID 0 can't cope with that, so we'll punt */
893 			too_fatal = 1;
894 		}
895 	}
896 
897 	raidPtr->mod_counter = mod_number;
898 
899 	if (too_fatal) {
900 		/* we've had both a serial number mismatch, and a mod_counter
901 		   mismatch -- and they involved two different components!!
902 		   Bail -- make things fail so that the user must force
903 		   the issue... */
904 		hosed_column = -1;
905 	}
906 
907 	if (num_ser > 2) {
908 		printf("raid%d: Too many different serial numbers!\n",
909 		       raidPtr->raidid);
910 	}
911 
912 	if (num_mod > 2) {
913 		printf("raid%d: Too many different mod counters!\n",
914 		       raidPtr->raidid);
915 	}
916 
917 	/* we start by assuming the parity will be good, and flee from
918 	   that notion at the slightest sign of trouble */
919 
920 	parity_good = RF_RAID_CLEAN;
921 
922 	for (c = 0; c < raidPtr->numCol; c++) {
923 		dev_name = &cfgPtr->devnames[0][c][0];
924 		ci_label = &raidPtr->raid_cinfo[c].ci_label;
925 
926 		if (c == hosed_column) {
927 			printf("raid%d: Ignoring %s\n",
928 			       raidPtr->raidid, dev_name);
929 		} else {
930 			rf_print_label_status( raidPtr, c, dev_name, ci_label);
931 			if (rf_check_label_vitals( raidPtr, 0, c,
932 						   dev_name, ci_label,
933 						   serial_number,
934 						   mod_number )) {
935 				fatal_error = 1;
936 			}
937 			if (ci_label->clean != RF_RAID_CLEAN) {
938 				parity_good = RF_RAID_DIRTY;
939 			}
940 		}
941 	}
942 
943 	if (fatal_error) {
944 		parity_good = RF_RAID_DIRTY;
945 	}
946 
947 	/* we note the state of the parity */
948 	raidPtr->parity_good = parity_good;
949 
950 	return(fatal_error);
951 }
952 
953 int
954 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
955 {
956 	RF_RaidDisk_t *disks;
957 	RF_DiskQueue_t *spareQueues;
958 	int ret;
959 	unsigned int bs;
960 	int spare_number;
961 
962 	ret=0;
963 
964 	if (raidPtr->numSpare >= RF_MAXSPARE) {
965 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
966 		return(EINVAL);
967 	}
968 
969 	RF_LOCK_MUTEX(raidPtr->mutex);
970 	while (raidPtr->adding_hot_spare==1) {
971 		ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
972 			&(raidPtr->mutex));
973 	}
974 	raidPtr->adding_hot_spare=1;
975 	RF_UNLOCK_MUTEX(raidPtr->mutex);
976 
977 	/* the beginning of the spares... */
978 	disks = &raidPtr->Disks[raidPtr->numCol];
979 
980 	spare_number = raidPtr->numSpare;
981 
982 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
983 			       &disks[spare_number],
984 			       raidPtr->numCol + spare_number);
985 
986 	if (ret)
987 		goto fail;
988 	if (disks[spare_number].status != rf_ds_optimal) {
989 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
990 			     sparePtr->component_name);
991 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
992 		ret=EINVAL;
993 		goto fail;
994 	} else {
995 		disks[spare_number].status = rf_ds_spare;
996 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
997 			 disks[spare_number].devname,
998 			 (long int) disks[spare_number].numBlocks,
999 			 disks[spare_number].blockSize,
1000 			 (long int) disks[spare_number].numBlocks *
1001 			 disks[spare_number].blockSize / 1024 / 1024);
1002 	}
1003 
1004 
1005 	/* check sizes and block sizes on the spare disk */
1006 	bs = 1 << raidPtr->logBytesPerSector;
1007 	if (disks[spare_number].blockSize != bs) {
1008 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1009 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1010 		ret = EINVAL;
1011 		goto fail;
1012 	}
1013 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1014 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
1015 			     disks[spare_number].devname,
1016 			     disks[spare_number].blockSize,
1017 			     (long int) raidPtr->sectorsPerDisk);
1018 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1019 		ret = EINVAL;
1020 		goto fail;
1021 	} else {
1022 		if (disks[spare_number].numBlocks >
1023 		    raidPtr->sectorsPerDisk) {
1024 			RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1025 				     (long int) raidPtr->sectorsPerDisk);
1026 
1027 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1028 		}
1029 	}
1030 
1031 	spareQueues = &raidPtr->Queues[raidPtr->numCol];
1032 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1033 				 raidPtr->numCol + spare_number,
1034 				 raidPtr->qType,
1035 				 raidPtr->sectorsPerDisk,
1036 				 raidPtr->Disks[raidPtr->numCol +
1037 						  spare_number].dev,
1038 				 raidPtr->maxOutstanding,
1039 				 &raidPtr->shutdownList,
1040 				 raidPtr->cleanupList);
1041 
1042 	RF_LOCK_MUTEX(raidPtr->mutex);
1043 	raidPtr->numSpare++;
1044 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1045 
1046 fail:
1047 	RF_LOCK_MUTEX(raidPtr->mutex);
1048 	raidPtr->adding_hot_spare=0;
1049 	wakeup(&(raidPtr->adding_hot_spare));
1050 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1051 
1052 	return(ret);
1053 }
1054 
1055 int
1056 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1057 {
1058 	int spare_number;
1059 
1060 
1061 	if (raidPtr->numSpare==0) {
1062 		printf("No spares to remove!\n");
1063 		return(EINVAL);
1064 	}
1065 
1066 	spare_number = sparePtr->column;
1067 
1068 	return(EINVAL); /* XXX not implemented yet */
1069 #if 0
1070 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1071 		return(EINVAL);
1072 	}
1073 
1074 	/* verify that this spare isn't in use... */
1075 
1076 
1077 
1078 
1079 	/* it's gone.. */
1080 
1081 	raidPtr->numSpare--;
1082 
1083 	return(0);
1084 #endif
1085 }
1086 
1087 
1088 int
1089 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1090 {
1091 	RF_RaidDisk_t *disks;
1092 
1093 	if ((component->column < 0) ||
1094 	    (component->column >= raidPtr->numCol)) {
1095 		return(EINVAL);
1096 	}
1097 
1098 	disks = &raidPtr->Disks[component->column];
1099 
1100 	/* 1. This component must be marked as 'failed' */
1101 
1102 	return(EINVAL); /* Not implemented yet. */
1103 }
1104 
1105 int
1106 rf_incorporate_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1107 {
1108 
1109 	/* Issues here include how to 'move' this in if there is IO
1110 	   taking place (e.g. component queues and such) */
1111 
1112 	return(EINVAL); /* Not implemented yet. */
1113 }
1114