xref: /netbsd-src/sys/dev/raidframe/rf_disks.c (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /*	$NetBSD: rf_disks.c,v 1.93 2022/08/10 01:16:38 mrg Exp $	*/
2 /*-
3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Greg Oster
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /*
32  * Copyright (c) 1995 Carnegie-Mellon University.
33  * All rights reserved.
34  *
35  * Author: Mark Holland
36  *
37  * Permission to use, copy, modify and distribute this software and
38  * its documentation is hereby granted, provided that both the copyright
39  * notice and this permission notice appear in all copies of the
40  * software, derivative works or modified versions, and any portions
41  * thereof, and that both notices appear in supporting documentation.
42  *
43  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
44  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
45  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46  *
47  * Carnegie Mellon requests users of this software to return to
48  *
49  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
50  *  School of Computer Science
51  *  Carnegie Mellon University
52  *  Pittsburgh PA 15213-3890
53  *
54  * any improvements or extensions that they make and grant Carnegie the
55  * rights to redistribute these changes.
56  */
57 
58 /***************************************************************
59  * rf_disks.c -- code to perform operations on the actual disks
60  ***************************************************************/
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.93 2022/08/10 01:16:38 mrg Exp $");
64 
65 #include <dev/raidframe/raidframevar.h>
66 
67 #include "rf_raid.h"
68 #include "rf_alloclist.h"
69 #include "rf_utils.h"
70 #include "rf_general.h"
71 #include "rf_options.h"
72 #include "rf_kintf.h"
73 #include "rf_netbsd.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <sys/vnode.h>
81 #include <sys/namei.h> /* for pathbuf */
82 #include <sys/kauth.h>
83 #include <miscfs/specfs/specdev.h> /* for v_rdev */
84 
85 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
86 static void rf_print_label_status( RF_Raid_t *, int, char *,
87 				  RF_ComponentLabel_t *);
88 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
89 				  RF_ComponentLabel_t *, int, int );
90 
91 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
92 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
93 
94 /**************************************************************************
95  *
96  * initialize the disks comprising the array
97  *
98  * We want the spare disks to have regular row,col numbers so that we can
99  * easily substitue a spare for a failed disk.  But, the driver code assumes
100  * throughout that the array contains numRow by numCol _non-spare_ disks, so
101  * it's not clear how to fit in the spares.  This is an unfortunate holdover
102  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
103  * rest, and put all the spares in it.  This probably needs to get changed
104  * eventually.
105  *
106  **************************************************************************/
107 
108 int
109 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
110 		  RF_Config_t *cfgPtr)
111 {
112 	RF_RaidDisk_t *disks;
113 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
114 	RF_RowCol_t c;
115 	int bs, ret;
116 	unsigned i, count, foundone = 0, numFailuresThisRow;
117 	int force;
118 
119 	force = cfgPtr->force;
120 
121 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
122 	if (ret)
123 		goto fail;
124 
125 	disks = raidPtr->Disks;
126 
127 	numFailuresThisRow = 0;
128 	for (c = 0; c < raidPtr->numCol; c++) {
129 		ret = rf_ConfigureDisk(raidPtr,
130 				       &cfgPtr->devnames[0][c][0],
131 				       &disks[c], c);
132 
133 		if (ret)
134 			goto fail;
135 
136 		if (disks[c].status == rf_ds_optimal) {
137 			ret = raidfetch_component_label(raidPtr, c);
138 			if (ret)
139 				goto fail;
140 
141 			/* mark it as failed if the label looks bogus... */
142 			if (!rf_reasonable_label(&raidPtr->raid_cinfo[c].ci_label,0) && !force) {
143 				disks[c].status = rf_ds_failed;
144 			}
145 		}
146 
147 		if (disks[c].status != rf_ds_optimal) {
148 			numFailuresThisRow++;
149 		} else {
150 			if (disks[c].numBlocks < min_numblks)
151 				min_numblks = disks[c].numBlocks;
152 			DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
153 				 c, disks[c].devname,
154 				 disks[c].numBlocks,
155 				 disks[c].blockSize,
156 				 (long int) disks[c].numBlocks *
157 				 disks[c].blockSize / 1024 / 1024);
158 		}
159 	}
160 	/* XXX fix for n-fault tolerant */
161 	/* XXX this should probably check to see how many failures
162 	   we can handle for this configuration! */
163 	if (numFailuresThisRow > 0)
164 		raidPtr->status = rf_rs_degraded;
165 
166 	/* all disks must be the same size & have the same block size, bs must
167 	 * be a power of 2 */
168 	bs = 0;
169 	foundone = 0;
170 	for (c = 0; c < raidPtr->numCol; c++) {
171 		if (disks[c].status == rf_ds_optimal) {
172 			bs = disks[c].blockSize;
173 			foundone = 1;
174 			break;
175 		}
176 	}
177 	if (!foundone) {
178 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
179 		ret = EINVAL;
180 		goto fail;
181 	}
182 	for (count = 0, i = 1; i; i <<= 1)
183 		if (bs & i)
184 			count++;
185 	if (count != 1) {
186 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
187 		ret = EINVAL;
188 		goto fail;
189 	}
190 
191 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
192 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
193 		if (force != 0) {
194 			printf("raid%d: Fatal errors being ignored.\n",
195 			       raidPtr->raidid);
196 		} else {
197 			ret = EINVAL;
198 			goto fail;
199 		}
200 	}
201 
202 	for (c = 0; c < raidPtr->numCol; c++) {
203 		if (disks[c].status == rf_ds_optimal) {
204 			if (disks[c].blockSize != bs) {
205 				RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
206 				ret = EINVAL;
207 				goto fail;
208 			}
209 			if (disks[c].numBlocks != min_numblks) {
210 				RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
211 					     c, (int) min_numblks);
212 				disks[c].numBlocks = min_numblks;
213 			}
214 		}
215 	}
216 
217 	raidPtr->sectorsPerDisk = min_numblks;
218 	raidPtr->logBytesPerSector = ffs(bs) - 1;
219 	raidPtr->bytesPerSector = bs;
220 	raidPtr->sectorMask = bs - 1;
221 	return (0);
222 
223 fail:
224 
225 	rf_UnconfigureVnodes( raidPtr );
226 
227 	return (ret);
228 }
229 
230 
231 /****************************************************************************
232  * set up the data structures describing the spare disks in the array
233  * recall from the above comment that the spare disk descriptors are stored
234  * in row zero, which is specially expanded to hold them.
235  ****************************************************************************/
236 int
237 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
238 		       RF_Config_t *cfgPtr)
239 {
240 	int     i, ret;
241 	unsigned int bs;
242 	RF_RaidDisk_t *disks;
243 	int     num_spares_done;
244 
245 	num_spares_done = 0;
246 
247 	/* The space for the spares should have already been allocated by
248 	 * ConfigureDisks() */
249 
250 	disks = &raidPtr->Disks[raidPtr->numCol];
251 	for (i = 0; i < raidPtr->numSpare; i++) {
252 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
253 				       &disks[i], raidPtr->numCol + i);
254 		if (ret)
255 			goto fail;
256 		if (disks[i].status != rf_ds_optimal) {
257 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
258 				     &cfgPtr->spare_names[i][0]);
259 		} else {
260 			disks[i].status = rf_ds_spare;	/* change status to
261 							 * spare */
262 			DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
263 			    disks[i].devname,
264 			    disks[i].numBlocks, disks[i].blockSize,
265 			    (long int) disks[i].numBlocks *
266 				 disks[i].blockSize / 1024 / 1024);
267 		}
268 		num_spares_done++;
269 	}
270 
271 	/* check sizes and block sizes on spare disks */
272 	bs = 1 << raidPtr->logBytesPerSector;
273 	for (i = 0; i < raidPtr->numSpare; i++) {
274 		if (disks[i].blockSize != bs) {
275 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
276 			ret = EINVAL;
277 			goto fail;
278 		}
279 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
280 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
281 				     disks[i].devname, disks[i].blockSize,
282 				     raidPtr->sectorsPerDisk);
283 			ret = EINVAL;
284 			goto fail;
285 		} else
286 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
287 				RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
288 				    disks[i].devname,
289 				    raidPtr->sectorsPerDisk,
290 				    disks[i].numBlocks);
291 
292 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
293 			}
294 	}
295 
296 	return (0);
297 
298 fail:
299 
300 	/* Release the hold on the main components.  We've failed to allocate
301 	 * a spare, and since we're failing, we need to free things..
302 
303 	 XXX failing to allocate a spare is *not* that big of a deal...
304 	 We *can* survive without it, if need be, esp. if we get hot
305 	 adding working.
306 
307 	 If we don't fail out here, then we need a way to remove this spare...
308 	 that should be easier to do here than if we are "live"...
309 
310 	 */
311 
312 	rf_UnconfigureVnodes( raidPtr );
313 
314 	return (ret);
315 }
316 
317 static int
318 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
319 {
320 	int ret;
321 	size_t entries = raidPtr->numCol + RF_MAXSPARE;
322 
323 	/* We allocate RF_MAXSPARE on the first row so that we
324 	   have room to do hot-swapping of spares */
325 	raidPtr->Disks = RF_MallocAndAdd(
326 	    entries * sizeof(*raidPtr->Disks), raidPtr->cleanupList);
327 	if (raidPtr->Disks == NULL) {
328 		ret = ENOMEM;
329 		goto fail;
330 	}
331 
332 	/* get space for device specific stuff.. */
333 	raidPtr->raid_cinfo = RF_MallocAndAdd(
334 	    entries * sizeof(*raidPtr->raid_cinfo), raidPtr->cleanupList);
335 	if (raidPtr->raid_cinfo == NULL) {
336 		ret = ENOMEM;
337 		goto fail;
338 	}
339 
340 	return(0);
341 fail:
342 	rf_UnconfigureVnodes( raidPtr );
343 
344 	return(ret);
345 }
346 
347 
348 /* configure a single disk during auto-configuration at boot */
349 int
350 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
351 		      RF_AutoConfig_t *auto_config)
352 {
353 	RF_RaidDisk_t *disks;
354 	RF_RaidDisk_t *diskPtr;
355 	RF_RowCol_t c;
356 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
357 	int bs, ret;
358 	int numFailuresThisRow;
359 	RF_AutoConfig_t *ac;
360 	int parity_good;
361 	int mod_counter;
362 	int mod_counter_found;
363 
364 #if DEBUG
365 	printf("Starting autoconfiguration of RAID set...\n");
366 #endif
367 
368 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
369 	if (ret)
370 		goto fail;
371 
372 	disks = raidPtr->Disks;
373 
374 	/* assume the parity will be fine.. */
375 	parity_good = RF_RAID_CLEAN;
376 
377 	/* Check for mod_counters that are too low */
378 	mod_counter_found = 0;
379 	mod_counter = 0;
380 	ac = auto_config;
381 	while(ac!=NULL) {
382 		if (mod_counter_found==0) {
383 			mod_counter = ac->clabel->mod_counter;
384 			mod_counter_found = 1;
385 		} else {
386 			if (ac->clabel->mod_counter > mod_counter) {
387 				mod_counter = ac->clabel->mod_counter;
388 			}
389 		}
390 		ac->flag = 0; /* clear the general purpose flag */
391 		ac = ac->next;
392 	}
393 
394 	bs = 0;
395 
396 	numFailuresThisRow = 0;
397 	for (c = 0; c < raidPtr->numCol; c++) {
398 		diskPtr = &disks[c];
399 
400 		/* find this row/col in the autoconfig */
401 #if DEBUG
402 		printf("Looking for %d in autoconfig\n",c);
403 #endif
404 		ac = auto_config;
405 		while(ac!=NULL) {
406 			if (ac->clabel==NULL) {
407 				/* big-time bad news. */
408 				goto fail;
409 			}
410 			if ((ac->clabel->column == c) &&
411 			    (ac->clabel->mod_counter == mod_counter)) {
412 				/* it's this one... */
413 				/* flag it as 'used', so we don't
414 				   free it later. */
415 				ac->flag = 1;
416 #if DEBUG
417 				printf("Found: %s at %d\n",
418 				       ac->devname,c);
419 #endif
420 
421 				break;
422 			}
423 			ac=ac->next;
424 		}
425 
426 		if (ac==NULL) {
427 			/* we didn't find an exact match with a
428 			   correct mod_counter above... can we find
429 			   one with an incorrect mod_counter to use
430 			   instead?  (this one, if we find it, will be
431 			   marked as failed once the set configures)
432 			*/
433 
434 			ac = auto_config;
435 			while(ac!=NULL) {
436 				if (ac->clabel==NULL) {
437 					/* big-time bad news. */
438 					goto fail;
439 				}
440 				if (ac->clabel->column == c) {
441 					/* it's this one...
442 					   flag it as 'used', so we
443 					   don't free it later. */
444 					ac->flag = 1;
445 #if DEBUG
446 					printf("Found(low mod_counter): %s at %d\n",
447 					       ac->devname,c);
448 #endif
449 
450 					break;
451 				}
452 				ac=ac->next;
453 			}
454 		}
455 
456 
457 
458 		if (ac!=NULL) {
459 			/* Found it.  Configure it.. */
460 			diskPtr->blockSize = ac->clabel->blockSize;
461 			diskPtr->numBlocks =
462 			    rf_component_label_numblocks(ac->clabel);
463 			/* Note: rf_protectedSectors is already
464 			   factored into numBlocks here */
465 			raidPtr->raid_cinfo[c].ci_vp = ac->vp;
466 			raidPtr->raid_cinfo[c].ci_dev = ac->dev;
467 
468 			memcpy(raidget_component_label(raidPtr, c),
469 			    ac->clabel, sizeof(*ac->clabel));
470 			snprintf(diskPtr->devname, sizeof(diskPtr->devname),
471 			    "/dev/%s", ac->devname);
472 
473 			/* note the fact that this component was
474 			   autoconfigured.  You'll need this info
475 			   later.  Trust me :) */
476 			diskPtr->auto_configured = 1;
477 			diskPtr->dev = ac->dev;
478 
479 			/*
480 			 * we allow the user to specify that
481 			 * only a fraction of the disks should
482 			 * be used this is just for debug: it
483 			 * speeds up the parity scan
484 			 */
485 
486 			diskPtr->numBlocks = diskPtr->numBlocks *
487 				rf_sizePercentage / 100;
488 
489 			/* XXX these will get set multiple times,
490 			   but since we're autoconfiguring, they'd
491 			   better be always the same each time!
492 			   If not, this is the least of your worries */
493 
494 			bs = diskPtr->blockSize;
495 			min_numblks = diskPtr->numBlocks;
496 
497 			/* this gets done multiple times, but that's
498 			   fine -- the serial number will be the same
499 			   for all components, guaranteed */
500 			raidPtr->serial_number = ac->clabel->serial_number;
501 			/* check the last time the label was modified */
502 
503 			if (ac->clabel->mod_counter != mod_counter) {
504 				/* Even though we've filled in all of
505 				   the above, we don't trust this
506 				   component since its modification
507 				   counter is not in sync with the
508 				   rest, and we really consider it to
509 				   be failed.  */
510 				disks[c].status = rf_ds_failed;
511 				numFailuresThisRow++;
512 			} else {
513 				if (ac->clabel->clean != RF_RAID_CLEAN) {
514 					parity_good = RF_RAID_DIRTY;
515 				}
516 			}
517 		} else {
518 			/* Didn't find it at all!!  Component must
519 			   really be dead */
520 			disks[c].status = rf_ds_failed;
521 			snprintf(disks[c].devname, sizeof(disks[c].devname),
522 			    "component%d", c);
523 			numFailuresThisRow++;
524 		}
525 	}
526 	/* XXX fix for n-fault tolerant */
527 	/* XXX this should probably check to see how many failures
528 	   we can handle for this configuration! */
529 	if (numFailuresThisRow > 0) {
530 		raidPtr->status = rf_rs_degraded;
531 		raidPtr->numFailures = numFailuresThisRow;
532 	}
533 
534 	/* close the device for the ones that didn't get used */
535 
536 	ac = auto_config;
537 	while(ac!=NULL) {
538 		if (ac->flag == 0) {
539 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
540 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
541 			vput(ac->vp);
542 			ac->vp = NULL;
543 #if DEBUG
544 			printf("Released %s from auto-config set.\n",
545 			       ac->devname);
546 #endif
547 		}
548 		ac = ac->next;
549 	}
550 
551 	raidPtr->mod_counter = mod_counter;
552 
553 	/* note the state of the parity, if any */
554 	raidPtr->parity_good = parity_good;
555 	raidPtr->sectorsPerDisk = min_numblks;
556 	raidPtr->logBytesPerSector = ffs(bs) - 1;
557 	raidPtr->bytesPerSector = bs;
558 	raidPtr->sectorMask = bs - 1;
559 	return (0);
560 
561 fail:
562 
563 	rf_UnconfigureVnodes( raidPtr );
564 
565 	return (ret);
566 
567 }
568 
569 /* configure a single disk in the array */
570 int
571 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
572 		 RF_RowCol_t col)
573 {
574 	char   *p;
575 	struct pathbuf *pb;
576 	struct vnode *vp;
577 	int     error;
578 
579 	p = rf_find_non_white(bf);
580 	if (p[strlen(p) - 1] == '\n') {
581 		/* strip off the newline */
582 		p[strlen(p) - 1] = '\0';
583 	}
584 	(void) strcpy(diskPtr->devname, p);
585 
586 	/* Let's start by claiming the component is fine and well... */
587 	diskPtr->status = rf_ds_optimal;
588 
589 	raidPtr->raid_cinfo[col].ci_vp = NULL;
590 	raidPtr->raid_cinfo[col].ci_dev = 0;
591 
592 	if (!strcmp("absent", diskPtr->devname)) {
593 		printf("Ignoring missing component at column %d\n", col);
594 		snprintf(diskPtr->devname, sizeof(diskPtr->devname),
595 		    "component%d", col);
596 		diskPtr->status = rf_ds_failed;
597 		return (0);
598 	}
599 
600 	pb = pathbuf_create(diskPtr->devname);
601 	if (pb == NULL) {
602 		printf("pathbuf_create for device: %s failed!\n",
603 		       diskPtr->devname);
604 		return ENOMEM;
605 	}
606 	error = vn_bdev_openpath(pb, &vp, curlwp);
607 	pathbuf_destroy(pb);
608 	if (error) {
609 		printf("open device: '%s' failed: %d\n", diskPtr->devname, error);
610 		if (error == ENXIO) {
611 			/* the component isn't there... must be dead :-( */
612 			diskPtr->status = rf_ds_failed;
613 			return 0;
614 		} else {
615 			return (error);
616 		}
617 	}
618 
619 	if ((error = rf_getdisksize(vp, diskPtr)) != 0)
620 		return (error);
621 
622 	/*
623 	 * If this raidPtr's bytesPerSector is zero, fill it in with this
624 	 * components blockSize.  This will give us something to work with
625 	 * initially, and if it is wrong, we'll get errors later.
626 	 */
627 	if (raidPtr->bytesPerSector == 0)
628 		raidPtr->bytesPerSector = diskPtr->blockSize;
629 
630 	if (diskPtr->status == rf_ds_optimal) {
631 		raidPtr->raid_cinfo[col].ci_vp = vp;
632 		raidPtr->raid_cinfo[col].ci_dev = vp->v_rdev;
633 
634 		/* This component was not automatically configured */
635 		diskPtr->auto_configured = 0;
636 		diskPtr->dev = vp->v_rdev;
637 
638 		/* we allow the user to specify that only a fraction of the
639 		 * disks should be used this is just for debug:  it speeds up
640 		 * the parity scan */
641 		diskPtr->numBlocks = diskPtr->numBlocks *
642 			rf_sizePercentage / 100;
643 	}
644 	return (0);
645 }
646 
647 static void
648 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
649 		      RF_ComponentLabel_t *ci_label)
650 {
651 
652 	printf("raid%d: Component %s being configured at col: %d\n",
653 	       raidPtr->raidid, dev_name, column );
654 	printf("         Column: %d Num Columns: %d\n",
655 	       ci_label->column,
656 	       ci_label->num_columns);
657 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
658 	       ci_label->version, ci_label->serial_number,
659 	       ci_label->mod_counter);
660 	printf("         Clean: %s Status: %d\n",
661 	       ci_label->clean ? "Yes" : "No", ci_label->status );
662 }
663 
664 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
665 				 char *dev_name, RF_ComponentLabel_t *ci_label,
666 				 int serial_number, int mod_counter)
667 {
668 	int fatal_error = 0;
669 
670 	if (serial_number != ci_label->serial_number) {
671 		printf("%s has a different serial number: %d %d\n",
672 		       dev_name, serial_number, ci_label->serial_number);
673 		fatal_error = 1;
674 	}
675 	if (mod_counter != ci_label->mod_counter) {
676 		printf("%s has a different modification count: %d %d\n",
677 		       dev_name, mod_counter, ci_label->mod_counter);
678 	}
679 
680 	if (row != ci_label->row) {
681 		printf("Row out of alignment for: %s\n", dev_name);
682 		fatal_error = 1;
683 	}
684 	if (column != ci_label->column) {
685 		printf("Column out of alignment for: %s\n", dev_name);
686 		fatal_error = 1;
687 	}
688 	if (raidPtr->numCol != ci_label->num_columns) {
689 		printf("Number of columns do not match for: %s\n", dev_name);
690 		fatal_error = 1;
691 	}
692 	if (ci_label->clean == 0) {
693 		/* it's not clean, but that's not fatal */
694 		printf("%s is not clean!\n", dev_name);
695 	}
696 	return(fatal_error);
697 }
698 
699 
700 static void
701 rf_handle_hosed(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, int hosed_column,
702     int again)
703 {
704 	printf("Hosed component: %s\n", &cfgPtr->devnames[0][hosed_column][0]);
705 	if (cfgPtr->force)
706 		return;
707 
708 	/* we'll fail this component, as if there are
709 	   other major errors, we aren't forcing things
710 	   and we'll abort the config anyways */
711 	if (again && raidPtr->Disks[hosed_column].status == rf_ds_failed)
712 		return;
713 
714 	raidPtr->Disks[hosed_column].status = rf_ds_failed;
715 	raidPtr->numFailures++;
716 	raidPtr->status = rf_rs_degraded;
717 }
718 
719 /*
720 
721    rf_CheckLabels() - check all the component labels for consistency.
722    Return an error if there is anything major amiss.
723 
724  */
725 
726 int
727 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
728 {
729 	int c;
730 	char *dev_name;
731 	RF_ComponentLabel_t *ci_label;
732 	int serial_number = 0;
733 	int mod_number = 0;
734 	int fatal_error = 0;
735 	int mod_values[4];
736 	int mod_count[4];
737 	int ser_values[4];
738 	int ser_count[4];
739 	int num_ser;
740 	int num_mod;
741 	int i;
742 	int found;
743 	int hosed_column;
744 	int too_fatal;
745 	int parity_good;
746 
747 	hosed_column = -1;
748 	too_fatal = 0;
749 
750 	/*
751 	   We're going to try to be a little intelligent here.  If one
752 	   component's label is bogus, and we can identify that it's the
753 	   *only* one that's gone, we'll mark it as "failed" and allow
754 	   the configuration to proceed.  This will be the *only* case
755 	   that we'll proceed if there would be (otherwise) fatal errors.
756 
757 	   Basically we simply keep a count of how many components had
758 	   what serial number.  If all but one agree, we simply mark
759 	   the disagreeing component as being failed, and allow
760 	   things to come up "normally".
761 
762 	   We do this first for serial numbers, and then for "mod_counter".
763 
764 	 */
765 
766 	num_ser = 0;
767 	num_mod = 0;
768 
769 	ser_values[0] = ser_values[1] = ser_values[2] = ser_values[3] = 0;
770 	ser_count[0] = ser_count[1] = ser_count[2] = ser_count[3] = 0;
771 	mod_values[0] = mod_values[1] = mod_values[2] = mod_values[3] = 0;
772 	mod_count[0] = mod_count[1] = mod_count[2] = mod_count[3] = 0;
773 
774 	for (c = 0; c < raidPtr->numCol; c++) {
775 		if (raidPtr->Disks[c].status != rf_ds_optimal)
776 			continue;
777 		ci_label = raidget_component_label(raidPtr, c);
778 		found=0;
779 		for(i=0;i<num_ser;i++) {
780 			if (ser_values[i] == ci_label->serial_number) {
781 				ser_count[i]++;
782 				found=1;
783 				break;
784 			}
785 		}
786 		if (!found) {
787 			ser_values[num_ser] = ci_label->serial_number;
788 			ser_count[num_ser] = 1;
789 			num_ser++;
790 			if (num_ser>2) {
791 				fatal_error = 1;
792 				break;
793 			}
794 		}
795 		found=0;
796 		for(i=0;i<num_mod;i++) {
797 			if (mod_values[i] == ci_label->mod_counter) {
798 				mod_count[i]++;
799 				found=1;
800 				break;
801 			}
802 		}
803 		if (!found) {
804 			mod_values[num_mod] = ci_label->mod_counter;
805 			mod_count[num_mod] = 1;
806 			num_mod++;
807 			if (num_mod>2) {
808 				fatal_error = 1;
809 				break;
810 			}
811 		}
812 	}
813 #if DEBUG
814 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
815 	for(i=0;i<num_ser;i++) {
816 		printf("%d %d\n", ser_values[i], ser_count[i]);
817 	}
818 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
819 	for(i=0;i<num_mod;i++) {
820 		printf("%d %d\n", mod_values[i], mod_count[i]);
821 	}
822 #endif
823 	serial_number = ser_values[0];
824 	if (num_ser == 2) {
825 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
826 			/* Locate the maverick component */
827 			if (ser_count[1] > ser_count[0]) {
828 				serial_number = ser_values[1];
829 			}
830 
831 			for (c = 0; c < raidPtr->numCol; c++) {
832 				if (raidPtr->Disks[c].status != rf_ds_optimal)
833 					continue;
834 				ci_label = raidget_component_label(raidPtr, c);
835 				if (serial_number != ci_label->serial_number) {
836 					hosed_column = c;
837 					break;
838 				}
839 			}
840 			if (hosed_column != -1)
841 				rf_handle_hosed(raidPtr, cfgPtr, hosed_column,
842 				    0);
843 		} else {
844 			too_fatal = 1;
845 		}
846 		if (cfgPtr->parityConfig == '0') {
847 			/* We've identified two different serial numbers.
848 			   RAID 0 can't cope with that, so we'll punt */
849 			too_fatal = 1;
850 		}
851 
852 	}
853 
854 	/* record the serial number for later.  If we bail later, setting
855 	   this doesn't matter, otherwise we've got the best guess at the
856 	   correct serial number */
857 	raidPtr->serial_number = serial_number;
858 
859 	mod_number = mod_values[0];
860 	if (num_mod == 2) {
861 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
862 			/* Locate the maverick component */
863 			if (mod_count[1] > mod_count[0]) {
864 				mod_number = mod_values[1];
865 			} else if (mod_count[1] < mod_count[0]) {
866 				mod_number = mod_values[0];
867 			} else {
868 				/* counts of different modification values
869 				   are the same.   Assume greater value is
870 				   the correct one, all other things
871 				   considered */
872 				if (mod_values[0] > mod_values[1]) {
873 					mod_number = mod_values[0];
874 				} else {
875 					mod_number = mod_values[1];
876 				}
877 
878 			}
879 
880 			for (c = 0; c < raidPtr->numCol; c++) {
881 				if (raidPtr->Disks[c].status != rf_ds_optimal)
882 					continue;
883 
884 				ci_label = raidget_component_label(raidPtr, c);
885 				if (mod_number != ci_label->mod_counter) {
886 					if (hosed_column == c) {
887 						/* same one.  Can
888 						   deal with it.  */
889 					} else {
890 						hosed_column = c;
891 						if (num_ser != 1) {
892 							too_fatal = 1;
893 							break;
894 						}
895 					}
896 				}
897 			}
898 			if (hosed_column != -1)
899 				rf_handle_hosed(raidPtr, cfgPtr, hosed_column,
900 				    1);
901 		} else {
902 			too_fatal = 1;
903 		}
904 		if (cfgPtr->parityConfig == '0') {
905 			/* We've identified two different mod counters.
906 			   RAID 0 can't cope with that, so we'll punt */
907 			too_fatal = 1;
908 		}
909 	}
910 
911 	raidPtr->mod_counter = mod_number;
912 
913 	if (too_fatal) {
914 		/* we've had both a serial number mismatch, and a mod_counter
915 		   mismatch -- and they involved two different components!!
916 		   Bail -- make things fail so that the user must force
917 		   the issue... */
918 		hosed_column = -1;
919 		fatal_error = 1;
920 	}
921 
922 	if (num_ser > 2) {
923 		printf("raid%d: Too many different serial numbers!\n",
924 		       raidPtr->raidid);
925 		fatal_error = 1;
926 	}
927 
928 	if (num_mod > 2) {
929 		printf("raid%d: Too many different mod counters!\n",
930 		       raidPtr->raidid);
931 		fatal_error = 1;
932 	}
933 
934         for (c = 0; c < raidPtr->numCol; c++) {
935 		if (raidPtr->Disks[c].status != rf_ds_optimal) {
936 			hosed_column = c;
937 			break;
938 		}
939 	}
940 
941 	/* we start by assuming the parity will be good, and flee from
942 	   that notion at the slightest sign of trouble */
943 
944 	parity_good = RF_RAID_CLEAN;
945 
946 	for (c = 0; c < raidPtr->numCol; c++) {
947 		dev_name = &cfgPtr->devnames[0][c][0];
948 		ci_label = raidget_component_label(raidPtr, c);
949 
950 		if (c == hosed_column) {
951 			printf("raid%d: Ignoring %s\n",
952 			       raidPtr->raidid, dev_name);
953 		} else {
954 			rf_print_label_status( raidPtr, c, dev_name, ci_label);
955 			if (rf_check_label_vitals( raidPtr, 0, c,
956 						   dev_name, ci_label,
957 						   serial_number,
958 						   mod_number )) {
959 				fatal_error = 1;
960 			}
961 			if (ci_label->clean != RF_RAID_CLEAN) {
962 				parity_good = RF_RAID_DIRTY;
963 			}
964 		}
965 	}
966 
967 	if (fatal_error) {
968 		parity_good = RF_RAID_DIRTY;
969 	}
970 
971 	/* we note the state of the parity */
972 	raidPtr->parity_good = parity_good;
973 
974 	return(fatal_error);
975 }
976 
977 int
978 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
979 {
980 	RF_RaidDisk_t *disks;
981 	RF_DiskQueue_t *spareQueues;
982 	int ret;
983 	unsigned int bs;
984 	int spare_number;
985 
986 	ret=0;
987 
988 	if (raidPtr->numSpare >= RF_MAXSPARE) {
989 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
990 		return(EINVAL);
991 	}
992 
993 	rf_lock_mutex2(raidPtr->mutex);
994 	while (raidPtr->adding_hot_spare == 1) {
995 		rf_wait_cond2(raidPtr->adding_hot_spare_cv, raidPtr->mutex);
996 	}
997 	raidPtr->adding_hot_spare = 1;
998 	rf_unlock_mutex2(raidPtr->mutex);
999 
1000 	/* the beginning of the spares... */
1001 	disks = &raidPtr->Disks[raidPtr->numCol];
1002 
1003 	spare_number = raidPtr->numSpare;
1004 
1005 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1006 			       &disks[spare_number],
1007 			       raidPtr->numCol + spare_number);
1008 
1009 	if (ret)
1010 		goto fail;
1011 	if (disks[spare_number].status != rf_ds_optimal) {
1012 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1013 			     sparePtr->component_name);
1014 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1015 		ret=EINVAL;
1016 		goto fail;
1017 	} else {
1018 		disks[spare_number].status = rf_ds_spare;
1019 		DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
1020 			 spare_number,
1021 			 disks[spare_number].devname,
1022 			 disks[spare_number].numBlocks,
1023 			 disks[spare_number].blockSize,
1024 			 (long int) disks[spare_number].numBlocks *
1025 			 disks[spare_number].blockSize / 1024 / 1024);
1026 	}
1027 
1028 
1029 	/* check sizes and block sizes on the spare disk */
1030 	bs = 1 << raidPtr->logBytesPerSector;
1031 	if (disks[spare_number].blockSize != bs) {
1032 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1033 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1034 		ret = EINVAL;
1035 		goto fail;
1036 	}
1037 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1038 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
1039 			     disks[spare_number].devname,
1040 			     disks[spare_number].blockSize,
1041 			     raidPtr->sectorsPerDisk);
1042 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1043 		ret = EINVAL;
1044 		goto fail;
1045 	} else {
1046 		if (disks[spare_number].numBlocks >
1047 		    raidPtr->sectorsPerDisk) {
1048 			RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
1049 			    disks[spare_number].devname,
1050 			    raidPtr->sectorsPerDisk,
1051 			    disks[spare_number].numBlocks);
1052 
1053 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1054 		}
1055 	}
1056 
1057 	spareQueues = &raidPtr->Queues[raidPtr->numCol];
1058 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1059 				 raidPtr->numCol + spare_number,
1060 				 raidPtr->qType,
1061 				 raidPtr->sectorsPerDisk,
1062 				 raidPtr->Disks[raidPtr->numCol +
1063 						  spare_number].dev,
1064 				 raidPtr->maxOutstanding,
1065 				 &raidPtr->shutdownList,
1066 				 raidPtr->cleanupList);
1067 
1068 	rf_lock_mutex2(raidPtr->mutex);
1069 	raidPtr->numSpare++;
1070 	rf_unlock_mutex2(raidPtr->mutex);
1071 
1072 fail:
1073 	rf_lock_mutex2(raidPtr->mutex);
1074 	raidPtr->adding_hot_spare = 0;
1075 	rf_signal_cond2(raidPtr->adding_hot_spare_cv);
1076 	rf_unlock_mutex2(raidPtr->mutex);
1077 
1078 	return(ret);
1079 }
1080 
1081 int
1082 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1083 {
1084 #if 0
1085 	int spare_number;
1086 #endif
1087 
1088 	if (raidPtr->numSpare==0) {
1089 		printf("No spares to remove!\n");
1090 		return(EINVAL);
1091 	}
1092 
1093 	return(EINVAL); /* XXX not implemented yet */
1094 #if 0
1095 	spare_number = sparePtr->column;
1096 
1097 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1098 		return(EINVAL);
1099 	}
1100 
1101 	/* verify that this spare isn't in use... */
1102 
1103 
1104 
1105 
1106 	/* it's gone.. */
1107 
1108 	raidPtr->numSpare--;
1109 
1110 	return(0);
1111 #endif
1112 }
1113 
1114 
1115 int
1116 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1117 {
1118 #if 0
1119 	RF_RaidDisk_t *disks;
1120 #endif
1121 
1122 	if ((component->column < 0) ||
1123 	    (component->column >= raidPtr->numCol)) {
1124 		return(EINVAL);
1125 	}
1126 
1127 #if 0
1128 	disks = &raidPtr->Disks[component->column];
1129 #endif
1130 
1131 	/* 1. This component must be marked as 'failed' */
1132 
1133 	return(EINVAL); /* Not implemented yet. */
1134 }
1135 
1136 int
1137 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
1138     RF_SingleComponent_t *component)
1139 {
1140 
1141 	/* Issues here include how to 'move' this in if there is IO
1142 	   taking place (e.g. component queues and such) */
1143 
1144 	return(EINVAL); /* Not implemented yet. */
1145 }
1146