xref: /netbsd-src/sys/dev/raidframe/rf_disks.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*	$NetBSD: rf_disks.c,v 1.73 2010/03/01 21:10:26 jld Exp $	*/
2 /*-
3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Greg Oster
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /*
32  * Copyright (c) 1995 Carnegie-Mellon University.
33  * All rights reserved.
34  *
35  * Author: Mark Holland
36  *
37  * Permission to use, copy, modify and distribute this software and
38  * its documentation is hereby granted, provided that both the copyright
39  * notice and this permission notice appear in all copies of the
40  * software, derivative works or modified versions, and any portions
41  * thereof, and that both notices appear in supporting documentation.
42  *
43  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
44  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
45  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46  *
47  * Carnegie Mellon requests users of this software to return to
48  *
49  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
50  *  School of Computer Science
51  *  Carnegie Mellon University
52  *  Pittsburgh PA 15213-3890
53  *
54  * any improvements or extensions that they make and grant Carnegie the
55  * rights to redistribute these changes.
56  */
57 
58 /***************************************************************
59  * rf_disks.c -- code to perform operations on the actual disks
60  ***************************************************************/
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.73 2010/03/01 21:10:26 jld Exp $");
64 
65 #include <dev/raidframe/raidframevar.h>
66 
67 #include "rf_raid.h"
68 #include "rf_alloclist.h"
69 #include "rf_utils.h"
70 #include "rf_general.h"
71 #include "rf_options.h"
72 #include "rf_kintf.h"
73 #include "rf_netbsd.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <sys/vnode.h>
81 #include <sys/kauth.h>
82 
83 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
84 static void rf_print_label_status( RF_Raid_t *, int, char *,
85 				  RF_ComponentLabel_t *);
86 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
87 				  RF_ComponentLabel_t *, int, int );
88 
89 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
90 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
91 
92 /**************************************************************************
93  *
94  * initialize the disks comprising the array
95  *
96  * We want the spare disks to have regular row,col numbers so that we can
97  * easily substitue a spare for a failed disk.  But, the driver code assumes
98  * throughout that the array contains numRow by numCol _non-spare_ disks, so
99  * it's not clear how to fit in the spares.  This is an unfortunate holdover
100  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
101  * rest, and put all the spares in it.  This probably needs to get changed
102  * eventually.
103  *
104  **************************************************************************/
105 
106 int
107 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
108 		  RF_Config_t *cfgPtr)
109 {
110 	RF_RaidDisk_t *disks;
111 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
112 	RF_RowCol_t c;
113 	int bs, ret;
114 	unsigned i, count, foundone = 0, numFailuresThisRow;
115 	int force;
116 
117 	force = cfgPtr->force;
118 
119 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
120 	if (ret)
121 		goto fail;
122 
123 	disks = raidPtr->Disks;
124 
125 	numFailuresThisRow = 0;
126 	for (c = 0; c < raidPtr->numCol; c++) {
127 		ret = rf_ConfigureDisk(raidPtr,
128 				       &cfgPtr->devnames[0][c][0],
129 				       &disks[c], c);
130 
131 		if (ret)
132 			goto fail;
133 
134 		if (disks[c].status == rf_ds_optimal) {
135 			ret = raidfetch_component_label(raidPtr, c);
136 			if (ret)
137 				goto fail;
138 		}
139 
140 		if (disks[c].status != rf_ds_optimal) {
141 			numFailuresThisRow++;
142 		} else {
143 			if (disks[c].numBlocks < min_numblks)
144 				min_numblks = disks[c].numBlocks;
145 			DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
146 				 c, disks[c].devname,
147 				 disks[c].numBlocks,
148 				 disks[c].blockSize,
149 				 (long int) disks[c].numBlocks *
150 				 disks[c].blockSize / 1024 / 1024);
151 		}
152 	}
153 	/* XXX fix for n-fault tolerant */
154 	/* XXX this should probably check to see how many failures
155 	   we can handle for this configuration! */
156 	if (numFailuresThisRow > 0)
157 		raidPtr->status = rf_rs_degraded;
158 
159 	/* all disks must be the same size & have the same block size, bs must
160 	 * be a power of 2 */
161 	bs = 0;
162 	foundone = 0;
163 	for (c = 0; c < raidPtr->numCol; c++) {
164 		if (disks[c].status == rf_ds_optimal) {
165 			bs = disks[c].blockSize;
166 			foundone = 1;
167 			break;
168 		}
169 	}
170 	if (!foundone) {
171 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
172 		ret = EINVAL;
173 		goto fail;
174 	}
175 	for (count = 0, i = 1; i; i <<= 1)
176 		if (bs & i)
177 			count++;
178 	if (count != 1) {
179 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
180 		ret = EINVAL;
181 		goto fail;
182 	}
183 
184 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
185 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
186 		if (force != 0) {
187 			printf("raid%d: Fatal errors being ignored.\n",
188 			       raidPtr->raidid);
189 		} else {
190 			ret = EINVAL;
191 			goto fail;
192 		}
193 	}
194 
195 	for (c = 0; c < raidPtr->numCol; c++) {
196 		if (disks[c].status == rf_ds_optimal) {
197 			if (disks[c].blockSize != bs) {
198 				RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
199 				ret = EINVAL;
200 				goto fail;
201 			}
202 			if (disks[c].numBlocks != min_numblks) {
203 				RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
204 					     c, (int) min_numblks);
205 				disks[c].numBlocks = min_numblks;
206 			}
207 		}
208 	}
209 
210 	raidPtr->sectorsPerDisk = min_numblks;
211 	raidPtr->logBytesPerSector = ffs(bs) - 1;
212 	raidPtr->bytesPerSector = bs;
213 	raidPtr->sectorMask = bs - 1;
214 	return (0);
215 
216 fail:
217 
218 	rf_UnconfigureVnodes( raidPtr );
219 
220 	return (ret);
221 }
222 
223 
224 /****************************************************************************
225  * set up the data structures describing the spare disks in the array
226  * recall from the above comment that the spare disk descriptors are stored
227  * in row zero, which is specially expanded to hold them.
228  ****************************************************************************/
229 int
230 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
231 		       RF_Config_t *cfgPtr)
232 {
233 	int     i, ret;
234 	unsigned int bs;
235 	RF_RaidDisk_t *disks;
236 	int     num_spares_done;
237 
238 	num_spares_done = 0;
239 
240 	/* The space for the spares should have already been allocated by
241 	 * ConfigureDisks() */
242 
243 	disks = &raidPtr->Disks[raidPtr->numCol];
244 	for (i = 0; i < raidPtr->numSpare; i++) {
245 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
246 				       &disks[i], raidPtr->numCol + i);
247 		if (ret)
248 			goto fail;
249 		if (disks[i].status != rf_ds_optimal) {
250 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
251 				     &cfgPtr->spare_names[i][0]);
252 		} else {
253 			disks[i].status = rf_ds_spare;	/* change status to
254 							 * spare */
255 			DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
256 			    disks[i].devname,
257 			    disks[i].numBlocks, disks[i].blockSize,
258 			    (long int) disks[i].numBlocks *
259 				 disks[i].blockSize / 1024 / 1024);
260 		}
261 		num_spares_done++;
262 	}
263 
264 	/* check sizes and block sizes on spare disks */
265 	bs = 1 << raidPtr->logBytesPerSector;
266 	for (i = 0; i < raidPtr->numSpare; i++) {
267 		if (disks[i].blockSize != bs) {
268 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
269 			ret = EINVAL;
270 			goto fail;
271 		}
272 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
273 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
274 				     disks[i].devname, disks[i].blockSize,
275 				     raidPtr->sectorsPerDisk);
276 			ret = EINVAL;
277 			goto fail;
278 		} else
279 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
280 				RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
281 				    disks[i].devname,
282 				    raidPtr->sectorsPerDisk,
283 				    disks[i].numBlocks);
284 
285 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
286 			}
287 	}
288 
289 	return (0);
290 
291 fail:
292 
293 	/* Release the hold on the main components.  We've failed to allocate
294 	 * a spare, and since we're failing, we need to free things..
295 
296 	 XXX failing to allocate a spare is *not* that big of a deal...
297 	 We *can* survive without it, if need be, esp. if we get hot
298 	 adding working.
299 
300 	 If we don't fail out here, then we need a way to remove this spare...
301 	 that should be easier to do here than if we are "live"...
302 
303 	 */
304 
305 	rf_UnconfigureVnodes( raidPtr );
306 
307 	return (ret);
308 }
309 
310 static int
311 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
312 {
313 	int ret;
314 
315 	/* We allocate RF_MAXSPARE on the first row so that we
316 	   have room to do hot-swapping of spares */
317 	RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
318 			sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
319 			raidPtr->cleanupList);
320 	if (raidPtr->Disks == NULL) {
321 		ret = ENOMEM;
322 		goto fail;
323 	}
324 
325 	/* get space for device specific stuff.. */
326 	RF_MallocAndAdd(raidPtr->raid_cinfo,
327 			(raidPtr->numCol + RF_MAXSPARE) *
328 			sizeof(struct raidcinfo), (struct raidcinfo *),
329 			raidPtr->cleanupList);
330 
331 	if (raidPtr->raid_cinfo == NULL) {
332 		ret = ENOMEM;
333 		goto fail;
334 	}
335 
336 	return(0);
337 fail:
338 	rf_UnconfigureVnodes( raidPtr );
339 
340 	return(ret);
341 }
342 
343 
344 /* configure a single disk during auto-configuration at boot */
345 int
346 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
347 		      RF_AutoConfig_t *auto_config)
348 {
349 	RF_RaidDisk_t *disks;
350 	RF_RaidDisk_t *diskPtr;
351 	RF_RowCol_t c;
352 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
353 	int bs, ret;
354 	int numFailuresThisRow;
355 	RF_AutoConfig_t *ac;
356 	int parity_good;
357 	int mod_counter;
358 	int mod_counter_found;
359 
360 #if DEBUG
361 	printf("Starting autoconfiguration of RAID set...\n");
362 #endif
363 
364 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
365 	if (ret)
366 		goto fail;
367 
368 	disks = raidPtr->Disks;
369 
370 	/* assume the parity will be fine.. */
371 	parity_good = RF_RAID_CLEAN;
372 
373 	/* Check for mod_counters that are too low */
374 	mod_counter_found = 0;
375 	mod_counter = 0;
376 	ac = auto_config;
377 	while(ac!=NULL) {
378 		if (mod_counter_found==0) {
379 			mod_counter = ac->clabel->mod_counter;
380 			mod_counter_found = 1;
381 		} else {
382 			if (ac->clabel->mod_counter > mod_counter) {
383 				mod_counter = ac->clabel->mod_counter;
384 			}
385 		}
386 		ac->flag = 0; /* clear the general purpose flag */
387 		ac = ac->next;
388 	}
389 
390 	bs = 0;
391 
392 	numFailuresThisRow = 0;
393 	for (c = 0; c < raidPtr->numCol; c++) {
394 		diskPtr = &disks[c];
395 
396 		/* find this row/col in the autoconfig */
397 #if DEBUG
398 		printf("Looking for %d in autoconfig\n",c);
399 #endif
400 		ac = auto_config;
401 		while(ac!=NULL) {
402 			if (ac->clabel==NULL) {
403 				/* big-time bad news. */
404 				goto fail;
405 			}
406 			if ((ac->clabel->column == c) &&
407 			    (ac->clabel->mod_counter == mod_counter)) {
408 				/* it's this one... */
409 				/* flag it as 'used', so we don't
410 				   free it later. */
411 				ac->flag = 1;
412 #if DEBUG
413 				printf("Found: %s at %d\n",
414 				       ac->devname,c);
415 #endif
416 
417 				break;
418 			}
419 			ac=ac->next;
420 		}
421 
422 		if (ac==NULL) {
423 			/* we didn't find an exact match with a
424 			   correct mod_counter above... can we find
425 			   one with an incorrect mod_counter to use
426 			   instead?  (this one, if we find it, will be
427 			   marked as failed once the set configures)
428 			*/
429 
430 			ac = auto_config;
431 			while(ac!=NULL) {
432 				if (ac->clabel==NULL) {
433 					/* big-time bad news. */
434 					goto fail;
435 				}
436 				if (ac->clabel->column == c) {
437 					/* it's this one...
438 					   flag it as 'used', so we
439 					   don't free it later. */
440 					ac->flag = 1;
441 #if DEBUG
442 					printf("Found(low mod_counter): %s at %d\n",
443 					       ac->devname,c);
444 #endif
445 
446 					break;
447 				}
448 				ac=ac->next;
449 			}
450 		}
451 
452 
453 
454 		if (ac!=NULL) {
455 			/* Found it.  Configure it.. */
456 			diskPtr->blockSize = ac->clabel->blockSize;
457 			diskPtr->numBlocks = ac->clabel->numBlocks;
458 			/* Note: rf_protectedSectors is already
459 			   factored into numBlocks here */
460 			raidPtr->raid_cinfo[c].ci_vp = ac->vp;
461 			raidPtr->raid_cinfo[c].ci_dev = ac->dev;
462 
463 			memcpy(raidget_component_label(raidPtr, c),
464 			    ac->clabel, sizeof(*ac->clabel));
465 			snprintf(diskPtr->devname, sizeof(diskPtr->devname),
466 			    "/dev/%s", ac->devname);
467 
468 			/* note the fact that this component was
469 			   autoconfigured.  You'll need this info
470 			   later.  Trust me :) */
471 			diskPtr->auto_configured = 1;
472 			diskPtr->dev = ac->dev;
473 
474 			/*
475 			 * we allow the user to specify that
476 			 * only a fraction of the disks should
477 			 * be used this is just for debug: it
478 			 * speeds up the parity scan
479 			 */
480 
481 			diskPtr->numBlocks = diskPtr->numBlocks *
482 				rf_sizePercentage / 100;
483 
484 			/* XXX these will get set multiple times,
485 			   but since we're autoconfiguring, they'd
486 			   better be always the same each time!
487 			   If not, this is the least of your worries */
488 
489 			bs = diskPtr->blockSize;
490 			min_numblks = diskPtr->numBlocks;
491 
492 			/* this gets done multiple times, but that's
493 			   fine -- the serial number will be the same
494 			   for all components, guaranteed */
495 			raidPtr->serial_number = ac->clabel->serial_number;
496 			/* check the last time the label was modified */
497 
498 			if (ac->clabel->mod_counter != mod_counter) {
499 				/* Even though we've filled in all of
500 				   the above, we don't trust this
501 				   component since it's modification
502 				   counter is not in sync with the
503 				   rest, and we really consider it to
504 				   be failed.  */
505 				disks[c].status = rf_ds_failed;
506 				numFailuresThisRow++;
507 			} else {
508 				if (ac->clabel->clean != RF_RAID_CLEAN) {
509 					parity_good = RF_RAID_DIRTY;
510 				}
511 			}
512 		} else {
513 			/* Didn't find it at all!!  Component must
514 			   really be dead */
515 			disks[c].status = rf_ds_failed;
516 			snprintf(disks[c].devname, sizeof(disks[c].devname),
517 			    "component%d", c);
518 			numFailuresThisRow++;
519 		}
520 	}
521 	/* XXX fix for n-fault tolerant */
522 	/* XXX this should probably check to see how many failures
523 	   we can handle for this configuration! */
524 	if (numFailuresThisRow > 0) {
525 		raidPtr->status = rf_rs_degraded;
526 		raidPtr->numFailures = numFailuresThisRow;
527 	}
528 
529 	/* close the device for the ones that didn't get used */
530 
531 	ac = auto_config;
532 	while(ac!=NULL) {
533 		if (ac->flag == 0) {
534 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
535 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
536 			vput(ac->vp);
537 			ac->vp = NULL;
538 #if DEBUG
539 			printf("Released %s from auto-config set.\n",
540 			       ac->devname);
541 #endif
542 		}
543 		ac = ac->next;
544 	}
545 
546 	raidPtr->mod_counter = mod_counter;
547 
548 	/* note the state of the parity, if any */
549 	raidPtr->parity_good = parity_good;
550 	raidPtr->sectorsPerDisk = min_numblks;
551 	raidPtr->logBytesPerSector = ffs(bs) - 1;
552 	raidPtr->bytesPerSector = bs;
553 	raidPtr->sectorMask = bs - 1;
554 	return (0);
555 
556 fail:
557 
558 	rf_UnconfigureVnodes( raidPtr );
559 
560 	return (ret);
561 
562 }
563 
564 /* configure a single disk in the array */
565 int
566 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
567 		 RF_RowCol_t col)
568 {
569 	char   *p;
570 	struct vnode *vp;
571 	struct vattr va;
572 	int     error;
573 
574 	p = rf_find_non_white(bf);
575 	if (p[strlen(p) - 1] == '\n') {
576 		/* strip off the newline */
577 		p[strlen(p) - 1] = '\0';
578 	}
579 	(void) strcpy(diskPtr->devname, p);
580 
581 	/* Let's start by claiming the component is fine and well... */
582 	diskPtr->status = rf_ds_optimal;
583 
584 	raidPtr->raid_cinfo[col].ci_vp = NULL;
585 	raidPtr->raid_cinfo[col].ci_dev = 0;
586 
587 	if (!strcmp("absent", diskPtr->devname)) {
588 		printf("Ignoring missing component at column %d\n", col);
589 		sprintf(diskPtr->devname, "component%d", col);
590 		diskPtr->status = rf_ds_failed;
591 		return (0);
592 	}
593 
594 	error = dk_lookup(diskPtr->devname, curlwp, &vp, UIO_SYSSPACE);
595 	if (error) {
596 		printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
597 		if (error == ENXIO) {
598 			/* the component isn't there... must be dead :-( */
599 			diskPtr->status = rf_ds_failed;
600 		} else {
601 			return (error);
602 		}
603 	}
604 	if (diskPtr->status == rf_ds_optimal) {
605 
606 		if ((error = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0)
607 			return (error);
608 		if ((error = rf_getdisksize(vp, curlwp, diskPtr)) != 0)
609 			return (error);
610 
611 		raidPtr->raid_cinfo[col].ci_vp = vp;
612 		raidPtr->raid_cinfo[col].ci_dev = va.va_rdev;
613 
614 		/* This component was not automatically configured */
615 		diskPtr->auto_configured = 0;
616 		diskPtr->dev = va.va_rdev;
617 
618 		/* we allow the user to specify that only a fraction of the
619 		 * disks should be used this is just for debug:  it speeds up
620 		 * the parity scan */
621 		diskPtr->numBlocks = diskPtr->numBlocks *
622 			rf_sizePercentage / 100;
623 	}
624 	return (0);
625 }
626 
627 static void
628 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
629 		      RF_ComponentLabel_t *ci_label)
630 {
631 
632 	printf("raid%d: Component %s being configured at col: %d\n",
633 	       raidPtr->raidid, dev_name, column );
634 	printf("         Column: %d Num Columns: %d\n",
635 	       ci_label->column,
636 	       ci_label->num_columns);
637 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
638 	       ci_label->version, ci_label->serial_number,
639 	       ci_label->mod_counter);
640 	printf("         Clean: %s Status: %d\n",
641 	       ci_label->clean ? "Yes" : "No", ci_label->status );
642 }
643 
644 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
645 				 char *dev_name, RF_ComponentLabel_t *ci_label,
646 				 int serial_number, int mod_counter)
647 {
648 	int fatal_error = 0;
649 
650 	if (serial_number != ci_label->serial_number) {
651 		printf("%s has a different serial number: %d %d\n",
652 		       dev_name, serial_number, ci_label->serial_number);
653 		fatal_error = 1;
654 	}
655 	if (mod_counter != ci_label->mod_counter) {
656 		printf("%s has a different modification count: %d %d\n",
657 		       dev_name, mod_counter, ci_label->mod_counter);
658 	}
659 
660 	if (row != ci_label->row) {
661 		printf("Row out of alignment for: %s\n", dev_name);
662 		fatal_error = 1;
663 	}
664 	if (column != ci_label->column) {
665 		printf("Column out of alignment for: %s\n", dev_name);
666 		fatal_error = 1;
667 	}
668 	if (raidPtr->numCol != ci_label->num_columns) {
669 		printf("Number of columns do not match for: %s\n", dev_name);
670 		fatal_error = 1;
671 	}
672 	if (ci_label->clean == 0) {
673 		/* it's not clean, but that's not fatal */
674 		printf("%s is not clean!\n", dev_name);
675 	}
676 	return(fatal_error);
677 }
678 
679 
680 /*
681 
682    rf_CheckLabels() - check all the component labels for consistency.
683    Return an error if there is anything major amiss.
684 
685  */
686 
687 int
688 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
689 {
690 	int c;
691 	char *dev_name;
692 	RF_ComponentLabel_t *ci_label;
693 	int serial_number = 0;
694 	int mod_number = 0;
695 	int fatal_error = 0;
696 	int mod_values[4];
697 	int mod_count[4];
698 	int ser_values[4];
699 	int ser_count[4];
700 	int num_ser;
701 	int num_mod;
702 	int i;
703 	int found;
704 	int hosed_column;
705 	int too_fatal;
706 	int parity_good;
707 	int force;
708 
709 	hosed_column = -1;
710 	too_fatal = 0;
711 	force = cfgPtr->force;
712 
713 	/*
714 	   We're going to try to be a little intelligent here.  If one
715 	   component's label is bogus, and we can identify that it's the
716 	   *only* one that's gone, we'll mark it as "failed" and allow
717 	   the configuration to proceed.  This will be the *only* case
718 	   that we'll proceed if there would be (otherwise) fatal errors.
719 
720 	   Basically we simply keep a count of how many components had
721 	   what serial number.  If all but one agree, we simply mark
722 	   the disagreeing component as being failed, and allow
723 	   things to come up "normally".
724 
725 	   We do this first for serial numbers, and then for "mod_counter".
726 
727 	 */
728 
729 	num_ser = 0;
730 	num_mod = 0;
731 
732 	for (c = 0; c < raidPtr->numCol; c++) {
733 		ci_label = raidget_component_label(raidPtr, c);
734 		found=0;
735 		for(i=0;i<num_ser;i++) {
736 			if (ser_values[i] == ci_label->serial_number) {
737 				ser_count[i]++;
738 				found=1;
739 				break;
740 			}
741 		}
742 		if (!found) {
743 			ser_values[num_ser] = ci_label->serial_number;
744 			ser_count[num_ser] = 1;
745 			num_ser++;
746 			if (num_ser>2) {
747 				fatal_error = 1;
748 				break;
749 			}
750 		}
751 		found=0;
752 		for(i=0;i<num_mod;i++) {
753 			if (mod_values[i] == ci_label->mod_counter) {
754 				mod_count[i]++;
755 				found=1;
756 				break;
757 			}
758 		}
759 		if (!found) {
760 			mod_values[num_mod] = ci_label->mod_counter;
761 			mod_count[num_mod] = 1;
762 			num_mod++;
763 			if (num_mod>2) {
764 				fatal_error = 1;
765 				break;
766 			}
767 		}
768 	}
769 #if DEBUG
770 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
771 	for(i=0;i<num_ser;i++) {
772 		printf("%d %d\n", ser_values[i], ser_count[i]);
773 	}
774 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
775 	for(i=0;i<num_mod;i++) {
776 		printf("%d %d\n", mod_values[i], mod_count[i]);
777 	}
778 #endif
779 	serial_number = ser_values[0];
780 	if (num_ser == 2) {
781 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
782 			/* Locate the maverick component */
783 			if (ser_count[1] > ser_count[0]) {
784 				serial_number = ser_values[1];
785 			}
786 
787 			for (c = 0; c < raidPtr->numCol; c++) {
788 				ci_label = raidget_component_label(raidPtr, c);
789 				if (serial_number != ci_label->serial_number) {
790 					hosed_column = c;
791 					break;
792 				}
793 			}
794 			printf("Hosed component: %s\n",
795 			       &cfgPtr->devnames[0][hosed_column][0]);
796 			if (!force) {
797 				/* we'll fail this component, as if there are
798 				   other major errors, we arn't forcing things
799 				   and we'll abort the config anyways */
800 				raidPtr->Disks[hosed_column].status
801 					= rf_ds_failed;
802 				raidPtr->numFailures++;
803 				raidPtr->status = rf_rs_degraded;
804 			}
805 		} else {
806 			too_fatal = 1;
807 		}
808 		if (cfgPtr->parityConfig == '0') {
809 			/* We've identified two different serial numbers.
810 			   RAID 0 can't cope with that, so we'll punt */
811 			too_fatal = 1;
812 		}
813 
814 	}
815 
816 	/* record the serial number for later.  If we bail later, setting
817 	   this doesn't matter, otherwise we've got the best guess at the
818 	   correct serial number */
819 	raidPtr->serial_number = serial_number;
820 
821 	mod_number = mod_values[0];
822 	if (num_mod == 2) {
823 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
824 			/* Locate the maverick component */
825 			if (mod_count[1] > mod_count[0]) {
826 				mod_number = mod_values[1];
827 			} else if (mod_count[1] < mod_count[0]) {
828 				mod_number = mod_values[0];
829 			} else {
830 				/* counts of different modification values
831 				   are the same.   Assume greater value is
832 				   the correct one, all other things
833 				   considered */
834 				if (mod_values[0] > mod_values[1]) {
835 					mod_number = mod_values[0];
836 				} else {
837 					mod_number = mod_values[1];
838 				}
839 
840 			}
841 
842 			for (c = 0; c < raidPtr->numCol; c++) {
843 				ci_label = raidget_component_label(raidPtr, c);
844 				if (mod_number != ci_label->mod_counter) {
845 					if (hosed_column == c) {
846 						/* same one.  Can
847 						   deal with it.  */
848 					} else {
849 						hosed_column = c;
850 						if (num_ser != 1) {
851 							too_fatal = 1;
852 							break;
853 						}
854 					}
855 				}
856 			}
857 			printf("Hosed component: %s\n",
858 			       &cfgPtr->devnames[0][hosed_column][0]);
859 			if (!force) {
860 				/* we'll fail this component, as if there are
861 				   other major errors, we arn't forcing things
862 				   and we'll abort the config anyways */
863 				if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
864 					raidPtr->Disks[hosed_column].status
865 						= rf_ds_failed;
866 					raidPtr->numFailures++;
867 					raidPtr->status = rf_rs_degraded;
868 				}
869 			}
870 		} else {
871 			too_fatal = 1;
872 		}
873 		if (cfgPtr->parityConfig == '0') {
874 			/* We've identified two different mod counters.
875 			   RAID 0 can't cope with that, so we'll punt */
876 			too_fatal = 1;
877 		}
878 	}
879 
880 	raidPtr->mod_counter = mod_number;
881 
882 	if (too_fatal) {
883 		/* we've had both a serial number mismatch, and a mod_counter
884 		   mismatch -- and they involved two different components!!
885 		   Bail -- make things fail so that the user must force
886 		   the issue... */
887 		hosed_column = -1;
888 		fatal_error = 1;
889 	}
890 
891 	if (num_ser > 2) {
892 		printf("raid%d: Too many different serial numbers!\n",
893 		       raidPtr->raidid);
894 		fatal_error = 1;
895 	}
896 
897 	if (num_mod > 2) {
898 		printf("raid%d: Too many different mod counters!\n",
899 		       raidPtr->raidid);
900 		fatal_error = 1;
901 	}
902 
903 	/* we start by assuming the parity will be good, and flee from
904 	   that notion at the slightest sign of trouble */
905 
906 	parity_good = RF_RAID_CLEAN;
907 
908 	for (c = 0; c < raidPtr->numCol; c++) {
909 		dev_name = &cfgPtr->devnames[0][c][0];
910 		ci_label = raidget_component_label(raidPtr, c);
911 
912 		if (c == hosed_column) {
913 			printf("raid%d: Ignoring %s\n",
914 			       raidPtr->raidid, dev_name);
915 		} else {
916 			rf_print_label_status( raidPtr, c, dev_name, ci_label);
917 			if (rf_check_label_vitals( raidPtr, 0, c,
918 						   dev_name, ci_label,
919 						   serial_number,
920 						   mod_number )) {
921 				fatal_error = 1;
922 			}
923 			if (ci_label->clean != RF_RAID_CLEAN) {
924 				parity_good = RF_RAID_DIRTY;
925 			}
926 		}
927 	}
928 
929 	if (fatal_error) {
930 		parity_good = RF_RAID_DIRTY;
931 	}
932 
933 	/* we note the state of the parity */
934 	raidPtr->parity_good = parity_good;
935 
936 	return(fatal_error);
937 }
938 
939 int
940 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
941 {
942 	RF_RaidDisk_t *disks;
943 	RF_DiskQueue_t *spareQueues;
944 	int ret;
945 	unsigned int bs;
946 	int spare_number;
947 
948 	ret=0;
949 
950 	if (raidPtr->numSpare >= RF_MAXSPARE) {
951 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
952 		return(EINVAL);
953 	}
954 
955 	RF_LOCK_MUTEX(raidPtr->mutex);
956 	while (raidPtr->adding_hot_spare==1) {
957 		ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
958 			&(raidPtr->mutex));
959 	}
960 	raidPtr->adding_hot_spare=1;
961 	RF_UNLOCK_MUTEX(raidPtr->mutex);
962 
963 	/* the beginning of the spares... */
964 	disks = &raidPtr->Disks[raidPtr->numCol];
965 
966 	spare_number = raidPtr->numSpare;
967 
968 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
969 			       &disks[spare_number],
970 			       raidPtr->numCol + spare_number);
971 
972 	if (ret)
973 		goto fail;
974 	if (disks[spare_number].status != rf_ds_optimal) {
975 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
976 			     sparePtr->component_name);
977 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
978 		ret=EINVAL;
979 		goto fail;
980 	} else {
981 		disks[spare_number].status = rf_ds_spare;
982 		DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
983 			 spare_number,
984 			 disks[spare_number].devname,
985 			 disks[spare_number].numBlocks,
986 			 disks[spare_number].blockSize,
987 			 (long int) disks[spare_number].numBlocks *
988 			 disks[spare_number].blockSize / 1024 / 1024);
989 	}
990 
991 
992 	/* check sizes and block sizes on the spare disk */
993 	bs = 1 << raidPtr->logBytesPerSector;
994 	if (disks[spare_number].blockSize != bs) {
995 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
996 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
997 		ret = EINVAL;
998 		goto fail;
999 	}
1000 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1001 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
1002 			     disks[spare_number].devname,
1003 			     disks[spare_number].blockSize,
1004 			     raidPtr->sectorsPerDisk);
1005 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1006 		ret = EINVAL;
1007 		goto fail;
1008 	} else {
1009 		if (disks[spare_number].numBlocks >
1010 		    raidPtr->sectorsPerDisk) {
1011 			RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
1012 			    disks[spare_number].devname,
1013 			    raidPtr->sectorsPerDisk,
1014 			    disks[spare_number].numBlocks);
1015 
1016 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1017 		}
1018 	}
1019 
1020 	spareQueues = &raidPtr->Queues[raidPtr->numCol];
1021 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1022 				 raidPtr->numCol + spare_number,
1023 				 raidPtr->qType,
1024 				 raidPtr->sectorsPerDisk,
1025 				 raidPtr->Disks[raidPtr->numCol +
1026 						  spare_number].dev,
1027 				 raidPtr->maxOutstanding,
1028 				 &raidPtr->shutdownList,
1029 				 raidPtr->cleanupList);
1030 
1031 	RF_LOCK_MUTEX(raidPtr->mutex);
1032 	raidPtr->numSpare++;
1033 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1034 
1035 fail:
1036 	RF_LOCK_MUTEX(raidPtr->mutex);
1037 	raidPtr->adding_hot_spare=0;
1038 	wakeup(&(raidPtr->adding_hot_spare));
1039 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1040 
1041 	return(ret);
1042 }
1043 
1044 int
1045 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1046 {
1047 	int spare_number;
1048 
1049 
1050 	if (raidPtr->numSpare==0) {
1051 		printf("No spares to remove!\n");
1052 		return(EINVAL);
1053 	}
1054 
1055 	spare_number = sparePtr->column;
1056 
1057 	return(EINVAL); /* XXX not implemented yet */
1058 #if 0
1059 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1060 		return(EINVAL);
1061 	}
1062 
1063 	/* verify that this spare isn't in use... */
1064 
1065 
1066 
1067 
1068 	/* it's gone.. */
1069 
1070 	raidPtr->numSpare--;
1071 
1072 	return(0);
1073 #endif
1074 }
1075 
1076 
1077 int
1078 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1079 {
1080 	RF_RaidDisk_t *disks;
1081 
1082 	if ((component->column < 0) ||
1083 	    (component->column >= raidPtr->numCol)) {
1084 		return(EINVAL);
1085 	}
1086 
1087 	disks = &raidPtr->Disks[component->column];
1088 
1089 	/* 1. This component must be marked as 'failed' */
1090 
1091 	return(EINVAL); /* Not implemented yet. */
1092 }
1093 
1094 int
1095 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
1096     RF_SingleComponent_t *component)
1097 {
1098 
1099 	/* Issues here include how to 'move' this in if there is IO
1100 	   taking place (e.g. component queues and such) */
1101 
1102 	return(EINVAL); /* Not implemented yet. */
1103 }
1104