xref: /netbsd-src/sys/dev/raidframe/rf_disks.c (revision 76c7fc5f6b13ed0b1508e6b313e88e59977ed78e)
1 /*	$NetBSD: rf_disks.c,v 1.91 2019/02/09 03:34:00 christos Exp $	*/
2 /*-
3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Greg Oster
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /*
32  * Copyright (c) 1995 Carnegie-Mellon University.
33  * All rights reserved.
34  *
35  * Author: Mark Holland
36  *
37  * Permission to use, copy, modify and distribute this software and
38  * its documentation is hereby granted, provided that both the copyright
39  * notice and this permission notice appear in all copies of the
40  * software, derivative works or modified versions, and any portions
41  * thereof, and that both notices appear in supporting documentation.
42  *
43  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
44  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
45  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46  *
47  * Carnegie Mellon requests users of this software to return to
48  *
49  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
50  *  School of Computer Science
51  *  Carnegie Mellon University
52  *  Pittsburgh PA 15213-3890
53  *
54  * any improvements or extensions that they make and grant Carnegie the
55  * rights to redistribute these changes.
56  */
57 
58 /***************************************************************
59  * rf_disks.c -- code to perform operations on the actual disks
60  ***************************************************************/
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.91 2019/02/09 03:34:00 christos Exp $");
64 
65 #include <dev/raidframe/raidframevar.h>
66 
67 #include "rf_raid.h"
68 #include "rf_alloclist.h"
69 #include "rf_utils.h"
70 #include "rf_general.h"
71 #include "rf_options.h"
72 #include "rf_kintf.h"
73 #include "rf_netbsd.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <sys/vnode.h>
81 #include <sys/namei.h> /* for pathbuf */
82 #include <sys/kauth.h>
83 #include <miscfs/specfs/specdev.h> /* for v_rdev */
84 
85 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
86 static void rf_print_label_status( RF_Raid_t *, int, char *,
87 				  RF_ComponentLabel_t *);
88 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
89 				  RF_ComponentLabel_t *, int, int );
90 
91 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
92 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
93 
94 /**************************************************************************
95  *
96  * initialize the disks comprising the array
97  *
98  * We want the spare disks to have regular row,col numbers so that we can
99  * easily substitue a spare for a failed disk.  But, the driver code assumes
100  * throughout that the array contains numRow by numCol _non-spare_ disks, so
101  * it's not clear how to fit in the spares.  This is an unfortunate holdover
102  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
103  * rest, and put all the spares in it.  This probably needs to get changed
104  * eventually.
105  *
106  **************************************************************************/
107 
108 int
109 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
110 		  RF_Config_t *cfgPtr)
111 {
112 	RF_RaidDisk_t *disks;
113 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
114 	RF_RowCol_t c;
115 	int bs, ret;
116 	unsigned i, count, foundone = 0, numFailuresThisRow;
117 	int force;
118 
119 	force = cfgPtr->force;
120 
121 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
122 	if (ret)
123 		goto fail;
124 
125 	disks = raidPtr->Disks;
126 
127 	numFailuresThisRow = 0;
128 	for (c = 0; c < raidPtr->numCol; c++) {
129 		ret = rf_ConfigureDisk(raidPtr,
130 				       &cfgPtr->devnames[0][c][0],
131 				       &disks[c], c);
132 
133 		if (ret)
134 			goto fail;
135 
136 		if (disks[c].status == rf_ds_optimal) {
137 			ret = raidfetch_component_label(raidPtr, c);
138 			if (ret)
139 				goto fail;
140 
141 			/* mark it as failed if the label looks bogus... */
142 			if (!rf_reasonable_label(&raidPtr->raid_cinfo[c].ci_label,0) && !force) {
143 				disks[c].status = rf_ds_failed;
144 			}
145 		}
146 
147 		if (disks[c].status != rf_ds_optimal) {
148 			numFailuresThisRow++;
149 		} else {
150 			if (disks[c].numBlocks < min_numblks)
151 				min_numblks = disks[c].numBlocks;
152 			DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
153 				 c, disks[c].devname,
154 				 disks[c].numBlocks,
155 				 disks[c].blockSize,
156 				 (long int) disks[c].numBlocks *
157 				 disks[c].blockSize / 1024 / 1024);
158 		}
159 	}
160 	/* XXX fix for n-fault tolerant */
161 	/* XXX this should probably check to see how many failures
162 	   we can handle for this configuration! */
163 	if (numFailuresThisRow > 0)
164 		raidPtr->status = rf_rs_degraded;
165 
166 	/* all disks must be the same size & have the same block size, bs must
167 	 * be a power of 2 */
168 	bs = 0;
169 	foundone = 0;
170 	for (c = 0; c < raidPtr->numCol; c++) {
171 		if (disks[c].status == rf_ds_optimal) {
172 			bs = disks[c].blockSize;
173 			foundone = 1;
174 			break;
175 		}
176 	}
177 	if (!foundone) {
178 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
179 		ret = EINVAL;
180 		goto fail;
181 	}
182 	for (count = 0, i = 1; i; i <<= 1)
183 		if (bs & i)
184 			count++;
185 	if (count != 1) {
186 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
187 		ret = EINVAL;
188 		goto fail;
189 	}
190 
191 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
192 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
193 		if (force != 0) {
194 			printf("raid%d: Fatal errors being ignored.\n",
195 			       raidPtr->raidid);
196 		} else {
197 			ret = EINVAL;
198 			goto fail;
199 		}
200 	}
201 
202 	for (c = 0; c < raidPtr->numCol; c++) {
203 		if (disks[c].status == rf_ds_optimal) {
204 			if (disks[c].blockSize != bs) {
205 				RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
206 				ret = EINVAL;
207 				goto fail;
208 			}
209 			if (disks[c].numBlocks != min_numblks) {
210 				RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
211 					     c, (int) min_numblks);
212 				disks[c].numBlocks = min_numblks;
213 			}
214 		}
215 	}
216 
217 	raidPtr->sectorsPerDisk = min_numblks;
218 	raidPtr->logBytesPerSector = ffs(bs) - 1;
219 	raidPtr->bytesPerSector = bs;
220 	raidPtr->sectorMask = bs - 1;
221 	return (0);
222 
223 fail:
224 
225 	rf_UnconfigureVnodes( raidPtr );
226 
227 	return (ret);
228 }
229 
230 
231 /****************************************************************************
232  * set up the data structures describing the spare disks in the array
233  * recall from the above comment that the spare disk descriptors are stored
234  * in row zero, which is specially expanded to hold them.
235  ****************************************************************************/
236 int
237 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
238 		       RF_Config_t *cfgPtr)
239 {
240 	int     i, ret;
241 	unsigned int bs;
242 	RF_RaidDisk_t *disks;
243 	int     num_spares_done;
244 
245 	num_spares_done = 0;
246 
247 	/* The space for the spares should have already been allocated by
248 	 * ConfigureDisks() */
249 
250 	disks = &raidPtr->Disks[raidPtr->numCol];
251 	for (i = 0; i < raidPtr->numSpare; i++) {
252 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
253 				       &disks[i], raidPtr->numCol + i);
254 		if (ret)
255 			goto fail;
256 		if (disks[i].status != rf_ds_optimal) {
257 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
258 				     &cfgPtr->spare_names[i][0]);
259 		} else {
260 			disks[i].status = rf_ds_spare;	/* change status to
261 							 * spare */
262 			DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
263 			    disks[i].devname,
264 			    disks[i].numBlocks, disks[i].blockSize,
265 			    (long int) disks[i].numBlocks *
266 				 disks[i].blockSize / 1024 / 1024);
267 		}
268 		num_spares_done++;
269 	}
270 
271 	/* check sizes and block sizes on spare disks */
272 	bs = 1 << raidPtr->logBytesPerSector;
273 	for (i = 0; i < raidPtr->numSpare; i++) {
274 		if (disks[i].blockSize != bs) {
275 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
276 			ret = EINVAL;
277 			goto fail;
278 		}
279 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
280 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
281 				     disks[i].devname, disks[i].blockSize,
282 				     raidPtr->sectorsPerDisk);
283 			ret = EINVAL;
284 			goto fail;
285 		} else
286 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
287 				RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
288 				    disks[i].devname,
289 				    raidPtr->sectorsPerDisk,
290 				    disks[i].numBlocks);
291 
292 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
293 			}
294 	}
295 
296 	return (0);
297 
298 fail:
299 
300 	/* Release the hold on the main components.  We've failed to allocate
301 	 * a spare, and since we're failing, we need to free things..
302 
303 	 XXX failing to allocate a spare is *not* that big of a deal...
304 	 We *can* survive without it, if need be, esp. if we get hot
305 	 adding working.
306 
307 	 If we don't fail out here, then we need a way to remove this spare...
308 	 that should be easier to do here than if we are "live"...
309 
310 	 */
311 
312 	rf_UnconfigureVnodes( raidPtr );
313 
314 	return (ret);
315 }
316 
317 static int
318 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
319 {
320 	int ret;
321 
322 	/* We allocate RF_MAXSPARE on the first row so that we
323 	   have room to do hot-swapping of spares */
324 	raidPtr->Disks = RF_MallocAndAdd((raidPtr->numCol + RF_MAXSPARE) *
325 	    sizeof(*raidPtr->Disks), raidPtr->cleanupList);
326 	if (raidPtr->Disks == NULL) {
327 		ret = ENOMEM;
328 		goto fail;
329 	}
330 
331 	/* get space for device specific stuff.. */
332 	raidPtr->raid_cinfo = RF_MallocAndAdd(
333 	    (raidPtr->numCol + RF_MAXSPARE) * sizeof(*raidPtr->raid_cinfo),
334 	    raidPtr->cleanupList);
335 
336 	if (raidPtr->raid_cinfo == NULL) {
337 		ret = ENOMEM;
338 		goto fail;
339 	}
340 
341 	return(0);
342 fail:
343 	rf_UnconfigureVnodes( raidPtr );
344 
345 	return(ret);
346 }
347 
348 
349 /* configure a single disk during auto-configuration at boot */
350 int
351 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
352 		      RF_AutoConfig_t *auto_config)
353 {
354 	RF_RaidDisk_t *disks;
355 	RF_RaidDisk_t *diskPtr;
356 	RF_RowCol_t c;
357 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
358 	int bs, ret;
359 	int numFailuresThisRow;
360 	RF_AutoConfig_t *ac;
361 	int parity_good;
362 	int mod_counter;
363 	int mod_counter_found;
364 
365 #if DEBUG
366 	printf("Starting autoconfiguration of RAID set...\n");
367 #endif
368 
369 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
370 	if (ret)
371 		goto fail;
372 
373 	disks = raidPtr->Disks;
374 
375 	/* assume the parity will be fine.. */
376 	parity_good = RF_RAID_CLEAN;
377 
378 	/* Check for mod_counters that are too low */
379 	mod_counter_found = 0;
380 	mod_counter = 0;
381 	ac = auto_config;
382 	while(ac!=NULL) {
383 		if (mod_counter_found==0) {
384 			mod_counter = ac->clabel->mod_counter;
385 			mod_counter_found = 1;
386 		} else {
387 			if (ac->clabel->mod_counter > mod_counter) {
388 				mod_counter = ac->clabel->mod_counter;
389 			}
390 		}
391 		ac->flag = 0; /* clear the general purpose flag */
392 		ac = ac->next;
393 	}
394 
395 	bs = 0;
396 
397 	numFailuresThisRow = 0;
398 	for (c = 0; c < raidPtr->numCol; c++) {
399 		diskPtr = &disks[c];
400 
401 		/* find this row/col in the autoconfig */
402 #if DEBUG
403 		printf("Looking for %d in autoconfig\n",c);
404 #endif
405 		ac = auto_config;
406 		while(ac!=NULL) {
407 			if (ac->clabel==NULL) {
408 				/* big-time bad news. */
409 				goto fail;
410 			}
411 			if ((ac->clabel->column == c) &&
412 			    (ac->clabel->mod_counter == mod_counter)) {
413 				/* it's this one... */
414 				/* flag it as 'used', so we don't
415 				   free it later. */
416 				ac->flag = 1;
417 #if DEBUG
418 				printf("Found: %s at %d\n",
419 				       ac->devname,c);
420 #endif
421 
422 				break;
423 			}
424 			ac=ac->next;
425 		}
426 
427 		if (ac==NULL) {
428 			/* we didn't find an exact match with a
429 			   correct mod_counter above... can we find
430 			   one with an incorrect mod_counter to use
431 			   instead?  (this one, if we find it, will be
432 			   marked as failed once the set configures)
433 			*/
434 
435 			ac = auto_config;
436 			while(ac!=NULL) {
437 				if (ac->clabel==NULL) {
438 					/* big-time bad news. */
439 					goto fail;
440 				}
441 				if (ac->clabel->column == c) {
442 					/* it's this one...
443 					   flag it as 'used', so we
444 					   don't free it later. */
445 					ac->flag = 1;
446 #if DEBUG
447 					printf("Found(low mod_counter): %s at %d\n",
448 					       ac->devname,c);
449 #endif
450 
451 					break;
452 				}
453 				ac=ac->next;
454 			}
455 		}
456 
457 
458 
459 		if (ac!=NULL) {
460 			/* Found it.  Configure it.. */
461 			diskPtr->blockSize = ac->clabel->blockSize;
462 			diskPtr->numBlocks =
463 			    rf_component_label_numblocks(ac->clabel);
464 			/* Note: rf_protectedSectors is already
465 			   factored into numBlocks here */
466 			raidPtr->raid_cinfo[c].ci_vp = ac->vp;
467 			raidPtr->raid_cinfo[c].ci_dev = ac->dev;
468 
469 			memcpy(raidget_component_label(raidPtr, c),
470 			    ac->clabel, sizeof(*ac->clabel));
471 			snprintf(diskPtr->devname, sizeof(diskPtr->devname),
472 			    "/dev/%s", ac->devname);
473 
474 			/* note the fact that this component was
475 			   autoconfigured.  You'll need this info
476 			   later.  Trust me :) */
477 			diskPtr->auto_configured = 1;
478 			diskPtr->dev = ac->dev;
479 
480 			/*
481 			 * we allow the user to specify that
482 			 * only a fraction of the disks should
483 			 * be used this is just for debug: it
484 			 * speeds up the parity scan
485 			 */
486 
487 			diskPtr->numBlocks = diskPtr->numBlocks *
488 				rf_sizePercentage / 100;
489 
490 			/* XXX these will get set multiple times,
491 			   but since we're autoconfiguring, they'd
492 			   better be always the same each time!
493 			   If not, this is the least of your worries */
494 
495 			bs = diskPtr->blockSize;
496 			min_numblks = diskPtr->numBlocks;
497 
498 			/* this gets done multiple times, but that's
499 			   fine -- the serial number will be the same
500 			   for all components, guaranteed */
501 			raidPtr->serial_number = ac->clabel->serial_number;
502 			/* check the last time the label was modified */
503 
504 			if (ac->clabel->mod_counter != mod_counter) {
505 				/* Even though we've filled in all of
506 				   the above, we don't trust this
507 				   component since its modification
508 				   counter is not in sync with the
509 				   rest, and we really consider it to
510 				   be failed.  */
511 				disks[c].status = rf_ds_failed;
512 				numFailuresThisRow++;
513 			} else {
514 				if (ac->clabel->clean != RF_RAID_CLEAN) {
515 					parity_good = RF_RAID_DIRTY;
516 				}
517 			}
518 		} else {
519 			/* Didn't find it at all!!  Component must
520 			   really be dead */
521 			disks[c].status = rf_ds_failed;
522 			snprintf(disks[c].devname, sizeof(disks[c].devname),
523 			    "component%d", c);
524 			numFailuresThisRow++;
525 		}
526 	}
527 	/* XXX fix for n-fault tolerant */
528 	/* XXX this should probably check to see how many failures
529 	   we can handle for this configuration! */
530 	if (numFailuresThisRow > 0) {
531 		raidPtr->status = rf_rs_degraded;
532 		raidPtr->numFailures = numFailuresThisRow;
533 	}
534 
535 	/* close the device for the ones that didn't get used */
536 
537 	ac = auto_config;
538 	while(ac!=NULL) {
539 		if (ac->flag == 0) {
540 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
541 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
542 			vput(ac->vp);
543 			ac->vp = NULL;
544 #if DEBUG
545 			printf("Released %s from auto-config set.\n",
546 			       ac->devname);
547 #endif
548 		}
549 		ac = ac->next;
550 	}
551 
552 	raidPtr->mod_counter = mod_counter;
553 
554 	/* note the state of the parity, if any */
555 	raidPtr->parity_good = parity_good;
556 	raidPtr->sectorsPerDisk = min_numblks;
557 	raidPtr->logBytesPerSector = ffs(bs) - 1;
558 	raidPtr->bytesPerSector = bs;
559 	raidPtr->sectorMask = bs - 1;
560 	return (0);
561 
562 fail:
563 
564 	rf_UnconfigureVnodes( raidPtr );
565 
566 	return (ret);
567 
568 }
569 
570 /* configure a single disk in the array */
571 int
572 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
573 		 RF_RowCol_t col)
574 {
575 	char   *p;
576 	struct pathbuf *pb;
577 	struct vnode *vp;
578 	int     error;
579 
580 	p = rf_find_non_white(bf);
581 	if (p[strlen(p) - 1] == '\n') {
582 		/* strip off the newline */
583 		p[strlen(p) - 1] = '\0';
584 	}
585 	(void) strcpy(diskPtr->devname, p);
586 
587 	/* Let's start by claiming the component is fine and well... */
588 	diskPtr->status = rf_ds_optimal;
589 
590 	raidPtr->raid_cinfo[col].ci_vp = NULL;
591 	raidPtr->raid_cinfo[col].ci_dev = 0;
592 
593 	if (!strcmp("absent", diskPtr->devname)) {
594 		printf("Ignoring missing component at column %d\n", col);
595 		snprintf(diskPtr->devname, sizeof(diskPtr->devname),
596 		    "component%d", col);
597 		diskPtr->status = rf_ds_failed;
598 		return (0);
599 	}
600 
601 	pb = pathbuf_create(diskPtr->devname);
602 	if (pb == NULL) {
603 		printf("pathbuf_create for device: %s failed!\n",
604 		       diskPtr->devname);
605 		return ENOMEM;
606 	}
607 	error = dk_lookup(pb, curlwp, &vp);
608 	pathbuf_destroy(pb);
609 	if (error) {
610 		printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
611 		if (error == ENXIO) {
612 			/* the component isn't there... must be dead :-( */
613 			diskPtr->status = rf_ds_failed;
614 			return 0;
615 		} else {
616 			return (error);
617 		}
618 	}
619 
620 	if ((error = rf_getdisksize(vp, diskPtr)) != 0)
621 		return (error);
622 
623 	/*
624 	 * If this raidPtr's bytesPerSector is zero, fill it in with this
625 	 * components blockSize.  This will give us something to work with
626 	 * initially, and if it is wrong, we'll get errors later.
627 	 */
628 	if (raidPtr->bytesPerSector == 0)
629 		raidPtr->bytesPerSector = diskPtr->blockSize;
630 
631 	if (diskPtr->status == rf_ds_optimal) {
632 		raidPtr->raid_cinfo[col].ci_vp = vp;
633 		raidPtr->raid_cinfo[col].ci_dev = vp->v_rdev;
634 
635 		/* This component was not automatically configured */
636 		diskPtr->auto_configured = 0;
637 		diskPtr->dev = vp->v_rdev;
638 
639 		/* we allow the user to specify that only a fraction of the
640 		 * disks should be used this is just for debug:  it speeds up
641 		 * the parity scan */
642 		diskPtr->numBlocks = diskPtr->numBlocks *
643 			rf_sizePercentage / 100;
644 	}
645 	return (0);
646 }
647 
648 static void
649 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
650 		      RF_ComponentLabel_t *ci_label)
651 {
652 
653 	printf("raid%d: Component %s being configured at col: %d\n",
654 	       raidPtr->raidid, dev_name, column );
655 	printf("         Column: %d Num Columns: %d\n",
656 	       ci_label->column,
657 	       ci_label->num_columns);
658 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
659 	       ci_label->version, ci_label->serial_number,
660 	       ci_label->mod_counter);
661 	printf("         Clean: %s Status: %d\n",
662 	       ci_label->clean ? "Yes" : "No", ci_label->status );
663 }
664 
665 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
666 				 char *dev_name, RF_ComponentLabel_t *ci_label,
667 				 int serial_number, int mod_counter)
668 {
669 	int fatal_error = 0;
670 
671 	if (serial_number != ci_label->serial_number) {
672 		printf("%s has a different serial number: %d %d\n",
673 		       dev_name, serial_number, ci_label->serial_number);
674 		fatal_error = 1;
675 	}
676 	if (mod_counter != ci_label->mod_counter) {
677 		printf("%s has a different modification count: %d %d\n",
678 		       dev_name, mod_counter, ci_label->mod_counter);
679 	}
680 
681 	if (row != ci_label->row) {
682 		printf("Row out of alignment for: %s\n", dev_name);
683 		fatal_error = 1;
684 	}
685 	if (column != ci_label->column) {
686 		printf("Column out of alignment for: %s\n", dev_name);
687 		fatal_error = 1;
688 	}
689 	if (raidPtr->numCol != ci_label->num_columns) {
690 		printf("Number of columns do not match for: %s\n", dev_name);
691 		fatal_error = 1;
692 	}
693 	if (ci_label->clean == 0) {
694 		/* it's not clean, but that's not fatal */
695 		printf("%s is not clean!\n", dev_name);
696 	}
697 	return(fatal_error);
698 }
699 
700 
701 static void
702 rf_handle_hosed(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, int hosed_column,
703     int again)
704 {
705 	printf("Hosed component: %s\n", &cfgPtr->devnames[0][hosed_column][0]);
706 	if (cfgPtr->force)
707 		return;
708 
709 	/* we'll fail this component, as if there are
710 	   other major errors, we aren't forcing things
711 	   and we'll abort the config anyways */
712 	if (again && raidPtr->Disks[hosed_column].status == rf_ds_failed)
713 		return;
714 
715 	raidPtr->Disks[hosed_column].status = rf_ds_failed;
716 	raidPtr->numFailures++;
717 	raidPtr->status = rf_rs_degraded;
718 }
719 
720 /*
721 
722    rf_CheckLabels() - check all the component labels for consistency.
723    Return an error if there is anything major amiss.
724 
725  */
726 
727 int
728 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
729 {
730 	int c;
731 	char *dev_name;
732 	RF_ComponentLabel_t *ci_label;
733 	int serial_number = 0;
734 	int mod_number = 0;
735 	int fatal_error = 0;
736 	int mod_values[4];
737 	int mod_count[4];
738 	int ser_values[4];
739 	int ser_count[4];
740 	int num_ser;
741 	int num_mod;
742 	int i;
743 	int found;
744 	int hosed_column;
745 	int too_fatal;
746 	int parity_good;
747 
748 	hosed_column = -1;
749 	too_fatal = 0;
750 
751 	/*
752 	   We're going to try to be a little intelligent here.  If one
753 	   component's label is bogus, and we can identify that it's the
754 	   *only* one that's gone, we'll mark it as "failed" and allow
755 	   the configuration to proceed.  This will be the *only* case
756 	   that we'll proceed if there would be (otherwise) fatal errors.
757 
758 	   Basically we simply keep a count of how many components had
759 	   what serial number.  If all but one agree, we simply mark
760 	   the disagreeing component as being failed, and allow
761 	   things to come up "normally".
762 
763 	   We do this first for serial numbers, and then for "mod_counter".
764 
765 	 */
766 
767 	num_ser = 0;
768 	num_mod = 0;
769 
770 	ser_values[0] = ser_values[1] = ser_values[2] = ser_values[3] = 0;
771 	ser_count[0] = ser_count[1] = ser_count[2] = ser_count[3] = 0;
772 	mod_values[0] = mod_values[1] = mod_values[2] = mod_values[3] = 0;
773 	mod_count[0] = mod_count[1] = mod_count[2] = mod_count[3] = 0;
774 
775 	for (c = 0; c < raidPtr->numCol; c++) {
776 		if (raidPtr->Disks[c].status != rf_ds_optimal)
777 			continue;
778 		ci_label = raidget_component_label(raidPtr, c);
779 		found=0;
780 		for(i=0;i<num_ser;i++) {
781 			if (ser_values[i] == ci_label->serial_number) {
782 				ser_count[i]++;
783 				found=1;
784 				break;
785 			}
786 		}
787 		if (!found) {
788 			ser_values[num_ser] = ci_label->serial_number;
789 			ser_count[num_ser] = 1;
790 			num_ser++;
791 			if (num_ser>2) {
792 				fatal_error = 1;
793 				break;
794 			}
795 		}
796 		found=0;
797 		for(i=0;i<num_mod;i++) {
798 			if (mod_values[i] == ci_label->mod_counter) {
799 				mod_count[i]++;
800 				found=1;
801 				break;
802 			}
803 		}
804 		if (!found) {
805 			mod_values[num_mod] = ci_label->mod_counter;
806 			mod_count[num_mod] = 1;
807 			num_mod++;
808 			if (num_mod>2) {
809 				fatal_error = 1;
810 				break;
811 			}
812 		}
813 	}
814 #if DEBUG
815 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
816 	for(i=0;i<num_ser;i++) {
817 		printf("%d %d\n", ser_values[i], ser_count[i]);
818 	}
819 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
820 	for(i=0;i<num_mod;i++) {
821 		printf("%d %d\n", mod_values[i], mod_count[i]);
822 	}
823 #endif
824 	serial_number = ser_values[0];
825 	if (num_ser == 2) {
826 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
827 			/* Locate the maverick component */
828 			if (ser_count[1] > ser_count[0]) {
829 				serial_number = ser_values[1];
830 			}
831 
832 			for (c = 0; c < raidPtr->numCol; c++) {
833 				if (raidPtr->Disks[c].status != rf_ds_optimal)
834 					continue;
835 				ci_label = raidget_component_label(raidPtr, c);
836 				if (serial_number != ci_label->serial_number) {
837 					hosed_column = c;
838 					break;
839 				}
840 			}
841 			if (hosed_column != -1)
842 				rf_handle_hosed(raidPtr, cfgPtr, hosed_column,
843 				    0);
844 		} else {
845 			too_fatal = 1;
846 		}
847 		if (cfgPtr->parityConfig == '0') {
848 			/* We've identified two different serial numbers.
849 			   RAID 0 can't cope with that, so we'll punt */
850 			too_fatal = 1;
851 		}
852 
853 	}
854 
855 	/* record the serial number for later.  If we bail later, setting
856 	   this doesn't matter, otherwise we've got the best guess at the
857 	   correct serial number */
858 	raidPtr->serial_number = serial_number;
859 
860 	mod_number = mod_values[0];
861 	if (num_mod == 2) {
862 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
863 			/* Locate the maverick component */
864 			if (mod_count[1] > mod_count[0]) {
865 				mod_number = mod_values[1];
866 			} else if (mod_count[1] < mod_count[0]) {
867 				mod_number = mod_values[0];
868 			} else {
869 				/* counts of different modification values
870 				   are the same.   Assume greater value is
871 				   the correct one, all other things
872 				   considered */
873 				if (mod_values[0] > mod_values[1]) {
874 					mod_number = mod_values[0];
875 				} else {
876 					mod_number = mod_values[1];
877 				}
878 
879 			}
880 
881 			for (c = 0; c < raidPtr->numCol; c++) {
882 				if (raidPtr->Disks[c].status != rf_ds_optimal)
883 					continue;
884 
885 				ci_label = raidget_component_label(raidPtr, c);
886 				if (mod_number != ci_label->mod_counter) {
887 					if (hosed_column == c) {
888 						/* same one.  Can
889 						   deal with it.  */
890 					} else {
891 						hosed_column = c;
892 						if (num_ser != 1) {
893 							too_fatal = 1;
894 							break;
895 						}
896 					}
897 				}
898 			}
899 			if (hosed_column != -1)
900 				rf_handle_hosed(raidPtr, cfgPtr, hosed_column,
901 				    1);
902 		} else {
903 			too_fatal = 1;
904 		}
905 		if (cfgPtr->parityConfig == '0') {
906 			/* We've identified two different mod counters.
907 			   RAID 0 can't cope with that, so we'll punt */
908 			too_fatal = 1;
909 		}
910 	}
911 
912 	raidPtr->mod_counter = mod_number;
913 
914 	if (too_fatal) {
915 		/* we've had both a serial number mismatch, and a mod_counter
916 		   mismatch -- and they involved two different components!!
917 		   Bail -- make things fail so that the user must force
918 		   the issue... */
919 		hosed_column = -1;
920 		fatal_error = 1;
921 	}
922 
923 	if (num_ser > 2) {
924 		printf("raid%d: Too many different serial numbers!\n",
925 		       raidPtr->raidid);
926 		fatal_error = 1;
927 	}
928 
929 	if (num_mod > 2) {
930 		printf("raid%d: Too many different mod counters!\n",
931 		       raidPtr->raidid);
932 		fatal_error = 1;
933 	}
934 
935         for (c = 0; c < raidPtr->numCol; c++) {
936 		if (raidPtr->Disks[c].status != rf_ds_optimal) {
937 			hosed_column = c;
938 			break;
939 		}
940 	}
941 
942 	/* we start by assuming the parity will be good, and flee from
943 	   that notion at the slightest sign of trouble */
944 
945 	parity_good = RF_RAID_CLEAN;
946 
947 	for (c = 0; c < raidPtr->numCol; c++) {
948 		dev_name = &cfgPtr->devnames[0][c][0];
949 		ci_label = raidget_component_label(raidPtr, c);
950 
951 		if (c == hosed_column) {
952 			printf("raid%d: Ignoring %s\n",
953 			       raidPtr->raidid, dev_name);
954 		} else {
955 			rf_print_label_status( raidPtr, c, dev_name, ci_label);
956 			if (rf_check_label_vitals( raidPtr, 0, c,
957 						   dev_name, ci_label,
958 						   serial_number,
959 						   mod_number )) {
960 				fatal_error = 1;
961 			}
962 			if (ci_label->clean != RF_RAID_CLEAN) {
963 				parity_good = RF_RAID_DIRTY;
964 			}
965 		}
966 	}
967 
968 	if (fatal_error) {
969 		parity_good = RF_RAID_DIRTY;
970 	}
971 
972 	/* we note the state of the parity */
973 	raidPtr->parity_good = parity_good;
974 
975 	return(fatal_error);
976 }
977 
978 int
979 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
980 {
981 	RF_RaidDisk_t *disks;
982 	RF_DiskQueue_t *spareQueues;
983 	int ret;
984 	unsigned int bs;
985 	int spare_number;
986 
987 	ret=0;
988 
989 	if (raidPtr->numSpare >= RF_MAXSPARE) {
990 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
991 		return(EINVAL);
992 	}
993 
994 	rf_lock_mutex2(raidPtr->mutex);
995 	while (raidPtr->adding_hot_spare == 1) {
996 		rf_wait_cond2(raidPtr->adding_hot_spare_cv, raidPtr->mutex);
997 	}
998 	raidPtr->adding_hot_spare = 1;
999 	rf_unlock_mutex2(raidPtr->mutex);
1000 
1001 	/* the beginning of the spares... */
1002 	disks = &raidPtr->Disks[raidPtr->numCol];
1003 
1004 	spare_number = raidPtr->numSpare;
1005 
1006 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1007 			       &disks[spare_number],
1008 			       raidPtr->numCol + spare_number);
1009 
1010 	if (ret)
1011 		goto fail;
1012 	if (disks[spare_number].status != rf_ds_optimal) {
1013 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1014 			     sparePtr->component_name);
1015 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1016 		ret=EINVAL;
1017 		goto fail;
1018 	} else {
1019 		disks[spare_number].status = rf_ds_spare;
1020 		DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
1021 			 spare_number,
1022 			 disks[spare_number].devname,
1023 			 disks[spare_number].numBlocks,
1024 			 disks[spare_number].blockSize,
1025 			 (long int) disks[spare_number].numBlocks *
1026 			 disks[spare_number].blockSize / 1024 / 1024);
1027 	}
1028 
1029 
1030 	/* check sizes and block sizes on the spare disk */
1031 	bs = 1 << raidPtr->logBytesPerSector;
1032 	if (disks[spare_number].blockSize != bs) {
1033 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1034 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1035 		ret = EINVAL;
1036 		goto fail;
1037 	}
1038 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1039 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
1040 			     disks[spare_number].devname,
1041 			     disks[spare_number].blockSize,
1042 			     raidPtr->sectorsPerDisk);
1043 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1044 		ret = EINVAL;
1045 		goto fail;
1046 	} else {
1047 		if (disks[spare_number].numBlocks >
1048 		    raidPtr->sectorsPerDisk) {
1049 			RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
1050 			    disks[spare_number].devname,
1051 			    raidPtr->sectorsPerDisk,
1052 			    disks[spare_number].numBlocks);
1053 
1054 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1055 		}
1056 	}
1057 
1058 	spareQueues = &raidPtr->Queues[raidPtr->numCol];
1059 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1060 				 raidPtr->numCol + spare_number,
1061 				 raidPtr->qType,
1062 				 raidPtr->sectorsPerDisk,
1063 				 raidPtr->Disks[raidPtr->numCol +
1064 						  spare_number].dev,
1065 				 raidPtr->maxOutstanding,
1066 				 &raidPtr->shutdownList,
1067 				 raidPtr->cleanupList);
1068 
1069 	rf_lock_mutex2(raidPtr->mutex);
1070 	raidPtr->numSpare++;
1071 	rf_unlock_mutex2(raidPtr->mutex);
1072 
1073 fail:
1074 	rf_lock_mutex2(raidPtr->mutex);
1075 	raidPtr->adding_hot_spare = 0;
1076 	rf_signal_cond2(raidPtr->adding_hot_spare_cv);
1077 	rf_unlock_mutex2(raidPtr->mutex);
1078 
1079 	return(ret);
1080 }
1081 
1082 int
1083 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1084 {
1085 #if 0
1086 	int spare_number;
1087 #endif
1088 
1089 	if (raidPtr->numSpare==0) {
1090 		printf("No spares to remove!\n");
1091 		return(EINVAL);
1092 	}
1093 
1094 	return(EINVAL); /* XXX not implemented yet */
1095 #if 0
1096 	spare_number = sparePtr->column;
1097 
1098 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1099 		return(EINVAL);
1100 	}
1101 
1102 	/* verify that this spare isn't in use... */
1103 
1104 
1105 
1106 
1107 	/* it's gone.. */
1108 
1109 	raidPtr->numSpare--;
1110 
1111 	return(0);
1112 #endif
1113 }
1114 
1115 
1116 int
1117 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1118 {
1119 #if 0
1120 	RF_RaidDisk_t *disks;
1121 #endif
1122 
1123 	if ((component->column < 0) ||
1124 	    (component->column >= raidPtr->numCol)) {
1125 		return(EINVAL);
1126 	}
1127 
1128 #if 0
1129 	disks = &raidPtr->Disks[component->column];
1130 #endif
1131 
1132 	/* 1. This component must be marked as 'failed' */
1133 
1134 	return(EINVAL); /* Not implemented yet. */
1135 }
1136 
1137 int
1138 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
1139     RF_SingleComponent_t *component)
1140 {
1141 
1142 	/* Issues here include how to 'move' this in if there is IO
1143 	   taking place (e.g. component queues and such) */
1144 
1145 	return(EINVAL); /* Not implemented yet. */
1146 }
1147