xref: /netbsd-src/sys/dev/raidframe/rf_disks.c (revision 817cd315412cb2ce0bc4e6bf3f12a866fd948396)
1 /*	$NetBSD: rf_disks.c,v 1.72 2009/11/17 18:54:26 jld Exp $	*/
2 /*-
3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Greg Oster
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /*
32  * Copyright (c) 1995 Carnegie-Mellon University.
33  * All rights reserved.
34  *
35  * Author: Mark Holland
36  *
37  * Permission to use, copy, modify and distribute this software and
38  * its documentation is hereby granted, provided that both the copyright
39  * notice and this permission notice appear in all copies of the
40  * software, derivative works or modified versions, and any portions
41  * thereof, and that both notices appear in supporting documentation.
42  *
43  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
44  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
45  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46  *
47  * Carnegie Mellon requests users of this software to return to
48  *
49  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
50  *  School of Computer Science
51  *  Carnegie Mellon University
52  *  Pittsburgh PA 15213-3890
53  *
54  * any improvements or extensions that they make and grant Carnegie the
55  * rights to redistribute these changes.
56  */
57 
58 /***************************************************************
59  * rf_disks.c -- code to perform operations on the actual disks
60  ***************************************************************/
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.72 2009/11/17 18:54:26 jld Exp $");
64 
65 #include <dev/raidframe/raidframevar.h>
66 
67 #include "rf_raid.h"
68 #include "rf_alloclist.h"
69 #include "rf_utils.h"
70 #include "rf_general.h"
71 #include "rf_options.h"
72 #include "rf_kintf.h"
73 #include "rf_netbsd.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <sys/vnode.h>
81 #include <sys/kauth.h>
82 
83 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
84 static void rf_print_label_status( RF_Raid_t *, int, char *,
85 				  RF_ComponentLabel_t *);
86 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
87 				  RF_ComponentLabel_t *, int, int );
88 
89 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
90 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
91 
92 /**************************************************************************
93  *
94  * initialize the disks comprising the array
95  *
96  * We want the spare disks to have regular row,col numbers so that we can
97  * easily substitue a spare for a failed disk.  But, the driver code assumes
98  * throughout that the array contains numRow by numCol _non-spare_ disks, so
99  * it's not clear how to fit in the spares.  This is an unfortunate holdover
100  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
101  * rest, and put all the spares in it.  This probably needs to get changed
102  * eventually.
103  *
104  **************************************************************************/
105 
106 int
107 rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
108 		  RF_Config_t *cfgPtr)
109 {
110 	RF_RaidDisk_t *disks;
111 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
112 	RF_RowCol_t c;
113 	int bs, ret;
114 	unsigned i, count, foundone = 0, numFailuresThisRow;
115 	int force;
116 
117 	force = cfgPtr->force;
118 
119 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
120 	if (ret)
121 		goto fail;
122 
123 	disks = raidPtr->Disks;
124 
125 	numFailuresThisRow = 0;
126 	for (c = 0; c < raidPtr->numCol; c++) {
127 		ret = rf_ConfigureDisk(raidPtr,
128 				       &cfgPtr->devnames[0][c][0],
129 				       &disks[c], c);
130 
131 		if (ret)
132 			goto fail;
133 
134 		if (disks[c].status == rf_ds_optimal) {
135 			raidfetch_component_label(raidPtr, c);
136 		}
137 
138 		if (disks[c].status != rf_ds_optimal) {
139 			numFailuresThisRow++;
140 		} else {
141 			if (disks[c].numBlocks < min_numblks)
142 				min_numblks = disks[c].numBlocks;
143 			DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
144 				 c, disks[c].devname,
145 				 disks[c].numBlocks,
146 				 disks[c].blockSize,
147 				 (long int) disks[c].numBlocks *
148 				 disks[c].blockSize / 1024 / 1024);
149 		}
150 	}
151 	/* XXX fix for n-fault tolerant */
152 	/* XXX this should probably check to see how many failures
153 	   we can handle for this configuration! */
154 	if (numFailuresThisRow > 0)
155 		raidPtr->status = rf_rs_degraded;
156 
157 	/* all disks must be the same size & have the same block size, bs must
158 	 * be a power of 2 */
159 	bs = 0;
160 	foundone = 0;
161 	for (c = 0; c < raidPtr->numCol; c++) {
162 		if (disks[c].status == rf_ds_optimal) {
163 			bs = disks[c].blockSize;
164 			foundone = 1;
165 			break;
166 		}
167 	}
168 	if (!foundone) {
169 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
170 		ret = EINVAL;
171 		goto fail;
172 	}
173 	for (count = 0, i = 1; i; i <<= 1)
174 		if (bs & i)
175 			count++;
176 	if (count != 1) {
177 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
178 		ret = EINVAL;
179 		goto fail;
180 	}
181 
182 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
183 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
184 		if (force != 0) {
185 			printf("raid%d: Fatal errors being ignored.\n",
186 			       raidPtr->raidid);
187 		} else {
188 			ret = EINVAL;
189 			goto fail;
190 		}
191 	}
192 
193 	for (c = 0; c < raidPtr->numCol; c++) {
194 		if (disks[c].status == rf_ds_optimal) {
195 			if (disks[c].blockSize != bs) {
196 				RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
197 				ret = EINVAL;
198 				goto fail;
199 			}
200 			if (disks[c].numBlocks != min_numblks) {
201 				RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
202 					     c, (int) min_numblks);
203 				disks[c].numBlocks = min_numblks;
204 			}
205 		}
206 	}
207 
208 	raidPtr->sectorsPerDisk = min_numblks;
209 	raidPtr->logBytesPerSector = ffs(bs) - 1;
210 	raidPtr->bytesPerSector = bs;
211 	raidPtr->sectorMask = bs - 1;
212 	return (0);
213 
214 fail:
215 
216 	rf_UnconfigureVnodes( raidPtr );
217 
218 	return (ret);
219 }
220 
221 
222 /****************************************************************************
223  * set up the data structures describing the spare disks in the array
224  * recall from the above comment that the spare disk descriptors are stored
225  * in row zero, which is specially expanded to hold them.
226  ****************************************************************************/
227 int
228 rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
229 		       RF_Config_t *cfgPtr)
230 {
231 	int     i, ret;
232 	unsigned int bs;
233 	RF_RaidDisk_t *disks;
234 	int     num_spares_done;
235 
236 	num_spares_done = 0;
237 
238 	/* The space for the spares should have already been allocated by
239 	 * ConfigureDisks() */
240 
241 	disks = &raidPtr->Disks[raidPtr->numCol];
242 	for (i = 0; i < raidPtr->numSpare; i++) {
243 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
244 				       &disks[i], raidPtr->numCol + i);
245 		if (ret)
246 			goto fail;
247 		if (disks[i].status != rf_ds_optimal) {
248 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
249 				     &cfgPtr->spare_names[i][0]);
250 		} else {
251 			disks[i].status = rf_ds_spare;	/* change status to
252 							 * spare */
253 			DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
254 			    disks[i].devname,
255 			    disks[i].numBlocks, disks[i].blockSize,
256 			    (long int) disks[i].numBlocks *
257 				 disks[i].blockSize / 1024 / 1024);
258 		}
259 		num_spares_done++;
260 	}
261 
262 	/* check sizes and block sizes on spare disks */
263 	bs = 1 << raidPtr->logBytesPerSector;
264 	for (i = 0; i < raidPtr->numSpare; i++) {
265 		if (disks[i].blockSize != bs) {
266 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
267 			ret = EINVAL;
268 			goto fail;
269 		}
270 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
271 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
272 				     disks[i].devname, disks[i].blockSize,
273 				     raidPtr->sectorsPerDisk);
274 			ret = EINVAL;
275 			goto fail;
276 		} else
277 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
278 				RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
279 				    disks[i].devname,
280 				    raidPtr->sectorsPerDisk,
281 				    disks[i].numBlocks);
282 
283 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
284 			}
285 	}
286 
287 	return (0);
288 
289 fail:
290 
291 	/* Release the hold on the main components.  We've failed to allocate
292 	 * a spare, and since we're failing, we need to free things..
293 
294 	 XXX failing to allocate a spare is *not* that big of a deal...
295 	 We *can* survive without it, if need be, esp. if we get hot
296 	 adding working.
297 
298 	 If we don't fail out here, then we need a way to remove this spare...
299 	 that should be easier to do here than if we are "live"...
300 
301 	 */
302 
303 	rf_UnconfigureVnodes( raidPtr );
304 
305 	return (ret);
306 }
307 
308 static int
309 rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
310 {
311 	int ret;
312 
313 	/* We allocate RF_MAXSPARE on the first row so that we
314 	   have room to do hot-swapping of spares */
315 	RF_MallocAndAdd(raidPtr->Disks, (raidPtr->numCol + RF_MAXSPARE) *
316 			sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
317 			raidPtr->cleanupList);
318 	if (raidPtr->Disks == NULL) {
319 		ret = ENOMEM;
320 		goto fail;
321 	}
322 
323 	/* get space for device specific stuff.. */
324 	RF_MallocAndAdd(raidPtr->raid_cinfo,
325 			(raidPtr->numCol + RF_MAXSPARE) *
326 			sizeof(struct raidcinfo), (struct raidcinfo *),
327 			raidPtr->cleanupList);
328 
329 	if (raidPtr->raid_cinfo == NULL) {
330 		ret = ENOMEM;
331 		goto fail;
332 	}
333 
334 	return(0);
335 fail:
336 	rf_UnconfigureVnodes( raidPtr );
337 
338 	return(ret);
339 }
340 
341 
342 /* configure a single disk during auto-configuration at boot */
343 int
344 rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
345 		      RF_AutoConfig_t *auto_config)
346 {
347 	RF_RaidDisk_t *disks;
348 	RF_RaidDisk_t *diskPtr;
349 	RF_RowCol_t c;
350 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
351 	int bs, ret;
352 	int numFailuresThisRow;
353 	RF_AutoConfig_t *ac;
354 	int parity_good;
355 	int mod_counter;
356 	int mod_counter_found;
357 
358 #if DEBUG
359 	printf("Starting autoconfiguration of RAID set...\n");
360 #endif
361 
362 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
363 	if (ret)
364 		goto fail;
365 
366 	disks = raidPtr->Disks;
367 
368 	/* assume the parity will be fine.. */
369 	parity_good = RF_RAID_CLEAN;
370 
371 	/* Check for mod_counters that are too low */
372 	mod_counter_found = 0;
373 	mod_counter = 0;
374 	ac = auto_config;
375 	while(ac!=NULL) {
376 		if (mod_counter_found==0) {
377 			mod_counter = ac->clabel->mod_counter;
378 			mod_counter_found = 1;
379 		} else {
380 			if (ac->clabel->mod_counter > mod_counter) {
381 				mod_counter = ac->clabel->mod_counter;
382 			}
383 		}
384 		ac->flag = 0; /* clear the general purpose flag */
385 		ac = ac->next;
386 	}
387 
388 	bs = 0;
389 
390 	numFailuresThisRow = 0;
391 	for (c = 0; c < raidPtr->numCol; c++) {
392 		diskPtr = &disks[c];
393 
394 		/* find this row/col in the autoconfig */
395 #if DEBUG
396 		printf("Looking for %d in autoconfig\n",c);
397 #endif
398 		ac = auto_config;
399 		while(ac!=NULL) {
400 			if (ac->clabel==NULL) {
401 				/* big-time bad news. */
402 				goto fail;
403 			}
404 			if ((ac->clabel->column == c) &&
405 			    (ac->clabel->mod_counter == mod_counter)) {
406 				/* it's this one... */
407 				/* flag it as 'used', so we don't
408 				   free it later. */
409 				ac->flag = 1;
410 #if DEBUG
411 				printf("Found: %s at %d\n",
412 				       ac->devname,c);
413 #endif
414 
415 				break;
416 			}
417 			ac=ac->next;
418 		}
419 
420 		if (ac==NULL) {
421 			/* we didn't find an exact match with a
422 			   correct mod_counter above... can we find
423 			   one with an incorrect mod_counter to use
424 			   instead?  (this one, if we find it, will be
425 			   marked as failed once the set configures)
426 			*/
427 
428 			ac = auto_config;
429 			while(ac!=NULL) {
430 				if (ac->clabel==NULL) {
431 					/* big-time bad news. */
432 					goto fail;
433 				}
434 				if (ac->clabel->column == c) {
435 					/* it's this one...
436 					   flag it as 'used', so we
437 					   don't free it later. */
438 					ac->flag = 1;
439 #if DEBUG
440 					printf("Found(low mod_counter): %s at %d\n",
441 					       ac->devname,c);
442 #endif
443 
444 					break;
445 				}
446 				ac=ac->next;
447 			}
448 		}
449 
450 
451 
452 		if (ac!=NULL) {
453 			/* Found it.  Configure it.. */
454 			diskPtr->blockSize = ac->clabel->blockSize;
455 			diskPtr->numBlocks = ac->clabel->numBlocks;
456 			/* Note: rf_protectedSectors is already
457 			   factored into numBlocks here */
458 			raidPtr->raid_cinfo[c].ci_vp = ac->vp;
459 			raidPtr->raid_cinfo[c].ci_dev = ac->dev;
460 
461 			memcpy(raidget_component_label(raidPtr, c),
462 			    ac->clabel, sizeof(*ac->clabel));
463 			snprintf(diskPtr->devname, sizeof(diskPtr->devname),
464 			    "/dev/%s", ac->devname);
465 
466 			/* note the fact that this component was
467 			   autoconfigured.  You'll need this info
468 			   later.  Trust me :) */
469 			diskPtr->auto_configured = 1;
470 			diskPtr->dev = ac->dev;
471 
472 			/*
473 			 * we allow the user to specify that
474 			 * only a fraction of the disks should
475 			 * be used this is just for debug: it
476 			 * speeds up the parity scan
477 			 */
478 
479 			diskPtr->numBlocks = diskPtr->numBlocks *
480 				rf_sizePercentage / 100;
481 
482 			/* XXX these will get set multiple times,
483 			   but since we're autoconfiguring, they'd
484 			   better be always the same each time!
485 			   If not, this is the least of your worries */
486 
487 			bs = diskPtr->blockSize;
488 			min_numblks = diskPtr->numBlocks;
489 
490 			/* this gets done multiple times, but that's
491 			   fine -- the serial number will be the same
492 			   for all components, guaranteed */
493 			raidPtr->serial_number = ac->clabel->serial_number;
494 			/* check the last time the label was modified */
495 
496 			if (ac->clabel->mod_counter != mod_counter) {
497 				/* Even though we've filled in all of
498 				   the above, we don't trust this
499 				   component since it's modification
500 				   counter is not in sync with the
501 				   rest, and we really consider it to
502 				   be failed.  */
503 				disks[c].status = rf_ds_failed;
504 				numFailuresThisRow++;
505 			} else {
506 				if (ac->clabel->clean != RF_RAID_CLEAN) {
507 					parity_good = RF_RAID_DIRTY;
508 				}
509 			}
510 		} else {
511 			/* Didn't find it at all!!  Component must
512 			   really be dead */
513 			disks[c].status = rf_ds_failed;
514 			snprintf(disks[c].devname, sizeof(disks[c].devname),
515 			    "component%d", c);
516 			numFailuresThisRow++;
517 		}
518 	}
519 	/* XXX fix for n-fault tolerant */
520 	/* XXX this should probably check to see how many failures
521 	   we can handle for this configuration! */
522 	if (numFailuresThisRow > 0) {
523 		raidPtr->status = rf_rs_degraded;
524 		raidPtr->numFailures = numFailuresThisRow;
525 	}
526 
527 	/* close the device for the ones that didn't get used */
528 
529 	ac = auto_config;
530 	while(ac!=NULL) {
531 		if (ac->flag == 0) {
532 			vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
533 			VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
534 			vput(ac->vp);
535 			ac->vp = NULL;
536 #if DEBUG
537 			printf("Released %s from auto-config set.\n",
538 			       ac->devname);
539 #endif
540 		}
541 		ac = ac->next;
542 	}
543 
544 	raidPtr->mod_counter = mod_counter;
545 
546 	/* note the state of the parity, if any */
547 	raidPtr->parity_good = parity_good;
548 	raidPtr->sectorsPerDisk = min_numblks;
549 	raidPtr->logBytesPerSector = ffs(bs) - 1;
550 	raidPtr->bytesPerSector = bs;
551 	raidPtr->sectorMask = bs - 1;
552 	return (0);
553 
554 fail:
555 
556 	rf_UnconfigureVnodes( raidPtr );
557 
558 	return (ret);
559 
560 }
561 
562 /* configure a single disk in the array */
563 int
564 rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
565 		 RF_RowCol_t col)
566 {
567 	char   *p;
568 	struct vnode *vp;
569 	struct vattr va;
570 	int     error;
571 
572 	p = rf_find_non_white(bf);
573 	if (p[strlen(p) - 1] == '\n') {
574 		/* strip off the newline */
575 		p[strlen(p) - 1] = '\0';
576 	}
577 	(void) strcpy(diskPtr->devname, p);
578 
579 	/* Let's start by claiming the component is fine and well... */
580 	diskPtr->status = rf_ds_optimal;
581 
582 	raidPtr->raid_cinfo[col].ci_vp = NULL;
583 	raidPtr->raid_cinfo[col].ci_dev = 0;
584 
585 	if (!strcmp("absent", diskPtr->devname)) {
586 		printf("Ignoring missing component at column %d\n", col);
587 		sprintf(diskPtr->devname, "component%d", col);
588 		diskPtr->status = rf_ds_failed;
589 		return (0);
590 	}
591 
592 	error = dk_lookup(diskPtr->devname, curlwp, &vp, UIO_SYSSPACE);
593 	if (error) {
594 		printf("dk_lookup on device: %s failed!\n", diskPtr->devname);
595 		if (error == ENXIO) {
596 			/* the component isn't there... must be dead :-( */
597 			diskPtr->status = rf_ds_failed;
598 		} else {
599 			return (error);
600 		}
601 	}
602 	if (diskPtr->status == rf_ds_optimal) {
603 
604 		if ((error = VOP_GETATTR(vp, &va, curlwp->l_cred)) != 0)
605 			return (error);
606 		if ((error = rf_getdisksize(vp, curlwp, diskPtr)) != 0)
607 			return (error);
608 
609 		raidPtr->raid_cinfo[col].ci_vp = vp;
610 		raidPtr->raid_cinfo[col].ci_dev = va.va_rdev;
611 
612 		/* This component was not automatically configured */
613 		diskPtr->auto_configured = 0;
614 		diskPtr->dev = va.va_rdev;
615 
616 		/* we allow the user to specify that only a fraction of the
617 		 * disks should be used this is just for debug:  it speeds up
618 		 * the parity scan */
619 		diskPtr->numBlocks = diskPtr->numBlocks *
620 			rf_sizePercentage / 100;
621 	}
622 	return (0);
623 }
624 
625 static void
626 rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
627 		      RF_ComponentLabel_t *ci_label)
628 {
629 
630 	printf("raid%d: Component %s being configured at col: %d\n",
631 	       raidPtr->raidid, dev_name, column );
632 	printf("         Column: %d Num Columns: %d\n",
633 	       ci_label->column,
634 	       ci_label->num_columns);
635 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
636 	       ci_label->version, ci_label->serial_number,
637 	       ci_label->mod_counter);
638 	printf("         Clean: %s Status: %d\n",
639 	       ci_label->clean ? "Yes" : "No", ci_label->status );
640 }
641 
642 static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
643 				 char *dev_name, RF_ComponentLabel_t *ci_label,
644 				 int serial_number, int mod_counter)
645 {
646 	int fatal_error = 0;
647 
648 	if (serial_number != ci_label->serial_number) {
649 		printf("%s has a different serial number: %d %d\n",
650 		       dev_name, serial_number, ci_label->serial_number);
651 		fatal_error = 1;
652 	}
653 	if (mod_counter != ci_label->mod_counter) {
654 		printf("%s has a different modification count: %d %d\n",
655 		       dev_name, mod_counter, ci_label->mod_counter);
656 	}
657 
658 	if (row != ci_label->row) {
659 		printf("Row out of alignment for: %s\n", dev_name);
660 		fatal_error = 1;
661 	}
662 	if (column != ci_label->column) {
663 		printf("Column out of alignment for: %s\n", dev_name);
664 		fatal_error = 1;
665 	}
666 	if (raidPtr->numCol != ci_label->num_columns) {
667 		printf("Number of columns do not match for: %s\n", dev_name);
668 		fatal_error = 1;
669 	}
670 	if (ci_label->clean == 0) {
671 		/* it's not clean, but that's not fatal */
672 		printf("%s is not clean!\n", dev_name);
673 	}
674 	return(fatal_error);
675 }
676 
677 
678 /*
679 
680    rf_CheckLabels() - check all the component labels for consistency.
681    Return an error if there is anything major amiss.
682 
683  */
684 
685 int
686 rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
687 {
688 	int c;
689 	char *dev_name;
690 	RF_ComponentLabel_t *ci_label;
691 	int serial_number = 0;
692 	int mod_number = 0;
693 	int fatal_error = 0;
694 	int mod_values[4];
695 	int mod_count[4];
696 	int ser_values[4];
697 	int ser_count[4];
698 	int num_ser;
699 	int num_mod;
700 	int i;
701 	int found;
702 	int hosed_column;
703 	int too_fatal;
704 	int parity_good;
705 	int force;
706 
707 	hosed_column = -1;
708 	too_fatal = 0;
709 	force = cfgPtr->force;
710 
711 	/*
712 	   We're going to try to be a little intelligent here.  If one
713 	   component's label is bogus, and we can identify that it's the
714 	   *only* one that's gone, we'll mark it as "failed" and allow
715 	   the configuration to proceed.  This will be the *only* case
716 	   that we'll proceed if there would be (otherwise) fatal errors.
717 
718 	   Basically we simply keep a count of how many components had
719 	   what serial number.  If all but one agree, we simply mark
720 	   the disagreeing component as being failed, and allow
721 	   things to come up "normally".
722 
723 	   We do this first for serial numbers, and then for "mod_counter".
724 
725 	 */
726 
727 	num_ser = 0;
728 	num_mod = 0;
729 
730 	for (c = 0; c < raidPtr->numCol; c++) {
731 		ci_label = raidget_component_label(raidPtr, c);
732 		found=0;
733 		for(i=0;i<num_ser;i++) {
734 			if (ser_values[i] == ci_label->serial_number) {
735 				ser_count[i]++;
736 				found=1;
737 				break;
738 			}
739 		}
740 		if (!found) {
741 			ser_values[num_ser] = ci_label->serial_number;
742 			ser_count[num_ser] = 1;
743 			num_ser++;
744 			if (num_ser>2) {
745 				fatal_error = 1;
746 				break;
747 			}
748 		}
749 		found=0;
750 		for(i=0;i<num_mod;i++) {
751 			if (mod_values[i] == ci_label->mod_counter) {
752 				mod_count[i]++;
753 				found=1;
754 				break;
755 			}
756 		}
757 		if (!found) {
758 			mod_values[num_mod] = ci_label->mod_counter;
759 			mod_count[num_mod] = 1;
760 			num_mod++;
761 			if (num_mod>2) {
762 				fatal_error = 1;
763 				break;
764 			}
765 		}
766 	}
767 #if DEBUG
768 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
769 	for(i=0;i<num_ser;i++) {
770 		printf("%d %d\n", ser_values[i], ser_count[i]);
771 	}
772 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
773 	for(i=0;i<num_mod;i++) {
774 		printf("%d %d\n", mod_values[i], mod_count[i]);
775 	}
776 #endif
777 	serial_number = ser_values[0];
778 	if (num_ser == 2) {
779 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
780 			/* Locate the maverick component */
781 			if (ser_count[1] > ser_count[0]) {
782 				serial_number = ser_values[1];
783 			}
784 
785 			for (c = 0; c < raidPtr->numCol; c++) {
786 				ci_label = raidget_component_label(raidPtr, c);
787 				if (serial_number != ci_label->serial_number) {
788 					hosed_column = c;
789 					break;
790 				}
791 			}
792 			printf("Hosed component: %s\n",
793 			       &cfgPtr->devnames[0][hosed_column][0]);
794 			if (!force) {
795 				/* we'll fail this component, as if there are
796 				   other major errors, we arn't forcing things
797 				   and we'll abort the config anyways */
798 				raidPtr->Disks[hosed_column].status
799 					= rf_ds_failed;
800 				raidPtr->numFailures++;
801 				raidPtr->status = rf_rs_degraded;
802 			}
803 		} else {
804 			too_fatal = 1;
805 		}
806 		if (cfgPtr->parityConfig == '0') {
807 			/* We've identified two different serial numbers.
808 			   RAID 0 can't cope with that, so we'll punt */
809 			too_fatal = 1;
810 		}
811 
812 	}
813 
814 	/* record the serial number for later.  If we bail later, setting
815 	   this doesn't matter, otherwise we've got the best guess at the
816 	   correct serial number */
817 	raidPtr->serial_number = serial_number;
818 
819 	mod_number = mod_values[0];
820 	if (num_mod == 2) {
821 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
822 			/* Locate the maverick component */
823 			if (mod_count[1] > mod_count[0]) {
824 				mod_number = mod_values[1];
825 			} else if (mod_count[1] < mod_count[0]) {
826 				mod_number = mod_values[0];
827 			} else {
828 				/* counts of different modification values
829 				   are the same.   Assume greater value is
830 				   the correct one, all other things
831 				   considered */
832 				if (mod_values[0] > mod_values[1]) {
833 					mod_number = mod_values[0];
834 				} else {
835 					mod_number = mod_values[1];
836 				}
837 
838 			}
839 
840 			for (c = 0; c < raidPtr->numCol; c++) {
841 				ci_label = raidget_component_label(raidPtr, c);
842 				if (mod_number != ci_label->mod_counter) {
843 					if (hosed_column == c) {
844 						/* same one.  Can
845 						   deal with it.  */
846 					} else {
847 						hosed_column = c;
848 						if (num_ser != 1) {
849 							too_fatal = 1;
850 							break;
851 						}
852 					}
853 				}
854 			}
855 			printf("Hosed component: %s\n",
856 			       &cfgPtr->devnames[0][hosed_column][0]);
857 			if (!force) {
858 				/* we'll fail this component, as if there are
859 				   other major errors, we arn't forcing things
860 				   and we'll abort the config anyways */
861 				if (raidPtr->Disks[hosed_column].status != rf_ds_failed) {
862 					raidPtr->Disks[hosed_column].status
863 						= rf_ds_failed;
864 					raidPtr->numFailures++;
865 					raidPtr->status = rf_rs_degraded;
866 				}
867 			}
868 		} else {
869 			too_fatal = 1;
870 		}
871 		if (cfgPtr->parityConfig == '0') {
872 			/* We've identified two different mod counters.
873 			   RAID 0 can't cope with that, so we'll punt */
874 			too_fatal = 1;
875 		}
876 	}
877 
878 	raidPtr->mod_counter = mod_number;
879 
880 	if (too_fatal) {
881 		/* we've had both a serial number mismatch, and a mod_counter
882 		   mismatch -- and they involved two different components!!
883 		   Bail -- make things fail so that the user must force
884 		   the issue... */
885 		hosed_column = -1;
886 		fatal_error = 1;
887 	}
888 
889 	if (num_ser > 2) {
890 		printf("raid%d: Too many different serial numbers!\n",
891 		       raidPtr->raidid);
892 		fatal_error = 1;
893 	}
894 
895 	if (num_mod > 2) {
896 		printf("raid%d: Too many different mod counters!\n",
897 		       raidPtr->raidid);
898 		fatal_error = 1;
899 	}
900 
901 	/* we start by assuming the parity will be good, and flee from
902 	   that notion at the slightest sign of trouble */
903 
904 	parity_good = RF_RAID_CLEAN;
905 
906 	for (c = 0; c < raidPtr->numCol; c++) {
907 		dev_name = &cfgPtr->devnames[0][c][0];
908 		ci_label = raidget_component_label(raidPtr, c);
909 
910 		if (c == hosed_column) {
911 			printf("raid%d: Ignoring %s\n",
912 			       raidPtr->raidid, dev_name);
913 		} else {
914 			rf_print_label_status( raidPtr, c, dev_name, ci_label);
915 			if (rf_check_label_vitals( raidPtr, 0, c,
916 						   dev_name, ci_label,
917 						   serial_number,
918 						   mod_number )) {
919 				fatal_error = 1;
920 			}
921 			if (ci_label->clean != RF_RAID_CLEAN) {
922 				parity_good = RF_RAID_DIRTY;
923 			}
924 		}
925 	}
926 
927 	if (fatal_error) {
928 		parity_good = RF_RAID_DIRTY;
929 	}
930 
931 	/* we note the state of the parity */
932 	raidPtr->parity_good = parity_good;
933 
934 	return(fatal_error);
935 }
936 
937 int
938 rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
939 {
940 	RF_RaidDisk_t *disks;
941 	RF_DiskQueue_t *spareQueues;
942 	int ret;
943 	unsigned int bs;
944 	int spare_number;
945 
946 	ret=0;
947 
948 	if (raidPtr->numSpare >= RF_MAXSPARE) {
949 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
950 		return(EINVAL);
951 	}
952 
953 	RF_LOCK_MUTEX(raidPtr->mutex);
954 	while (raidPtr->adding_hot_spare==1) {
955 		ltsleep(&(raidPtr->adding_hot_spare), PRIBIO, "raidhs", 0,
956 			&(raidPtr->mutex));
957 	}
958 	raidPtr->adding_hot_spare=1;
959 	RF_UNLOCK_MUTEX(raidPtr->mutex);
960 
961 	/* the beginning of the spares... */
962 	disks = &raidPtr->Disks[raidPtr->numCol];
963 
964 	spare_number = raidPtr->numSpare;
965 
966 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
967 			       &disks[spare_number],
968 			       raidPtr->numCol + spare_number);
969 
970 	if (ret)
971 		goto fail;
972 	if (disks[spare_number].status != rf_ds_optimal) {
973 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
974 			     sparePtr->component_name);
975 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
976 		ret=EINVAL;
977 		goto fail;
978 	} else {
979 		disks[spare_number].status = rf_ds_spare;
980 		DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
981 			 spare_number,
982 			 disks[spare_number].devname,
983 			 disks[spare_number].numBlocks,
984 			 disks[spare_number].blockSize,
985 			 (long int) disks[spare_number].numBlocks *
986 			 disks[spare_number].blockSize / 1024 / 1024);
987 	}
988 
989 
990 	/* check sizes and block sizes on the spare disk */
991 	bs = 1 << raidPtr->logBytesPerSector;
992 	if (disks[spare_number].blockSize != bs) {
993 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
994 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
995 		ret = EINVAL;
996 		goto fail;
997 	}
998 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
999 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
1000 			     disks[spare_number].devname,
1001 			     disks[spare_number].blockSize,
1002 			     raidPtr->sectorsPerDisk);
1003 		rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
1004 		ret = EINVAL;
1005 		goto fail;
1006 	} else {
1007 		if (disks[spare_number].numBlocks >
1008 		    raidPtr->sectorsPerDisk) {
1009 			RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
1010 			    disks[spare_number].devname,
1011 			    raidPtr->sectorsPerDisk,
1012 			    disks[spare_number].numBlocks);
1013 
1014 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1015 		}
1016 	}
1017 
1018 	spareQueues = &raidPtr->Queues[raidPtr->numCol];
1019 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1020 				 raidPtr->numCol + spare_number,
1021 				 raidPtr->qType,
1022 				 raidPtr->sectorsPerDisk,
1023 				 raidPtr->Disks[raidPtr->numCol +
1024 						  spare_number].dev,
1025 				 raidPtr->maxOutstanding,
1026 				 &raidPtr->shutdownList,
1027 				 raidPtr->cleanupList);
1028 
1029 	RF_LOCK_MUTEX(raidPtr->mutex);
1030 	raidPtr->numSpare++;
1031 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1032 
1033 fail:
1034 	RF_LOCK_MUTEX(raidPtr->mutex);
1035 	raidPtr->adding_hot_spare=0;
1036 	wakeup(&(raidPtr->adding_hot_spare));
1037 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1038 
1039 	return(ret);
1040 }
1041 
1042 int
1043 rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1044 {
1045 	int spare_number;
1046 
1047 
1048 	if (raidPtr->numSpare==0) {
1049 		printf("No spares to remove!\n");
1050 		return(EINVAL);
1051 	}
1052 
1053 	spare_number = sparePtr->column;
1054 
1055 	return(EINVAL); /* XXX not implemented yet */
1056 #if 0
1057 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1058 		return(EINVAL);
1059 	}
1060 
1061 	/* verify that this spare isn't in use... */
1062 
1063 
1064 
1065 
1066 	/* it's gone.. */
1067 
1068 	raidPtr->numSpare--;
1069 
1070 	return(0);
1071 #endif
1072 }
1073 
1074 
1075 int
1076 rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1077 {
1078 	RF_RaidDisk_t *disks;
1079 
1080 	if ((component->column < 0) ||
1081 	    (component->column >= raidPtr->numCol)) {
1082 		return(EINVAL);
1083 	}
1084 
1085 	disks = &raidPtr->Disks[component->column];
1086 
1087 	/* 1. This component must be marked as 'failed' */
1088 
1089 	return(EINVAL); /* Not implemented yet. */
1090 }
1091 
1092 int
1093 rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
1094     RF_SingleComponent_t *component)
1095 {
1096 
1097 	/* Issues here include how to 'move' this in if there is IO
1098 	   taking place (e.g. component queues and such) */
1099 
1100 	return(EINVAL); /* Not implemented yet. */
1101 }
1102