xref: /netbsd-src/sys/dev/raidframe/rf_disks.c (revision e4d7c2e329d54c97e0c0bd3016bbe74f550c3d5e)
1 /*	$NetBSD: rf_disks.c,v 1.22 2000/03/03 03:47:17 oster Exp $	*/
2 /*-
3  * Copyright (c) 1999 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Greg Oster
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *        This product includes software developed by the NetBSD
20  *        Foundation, Inc. and its contributors.
21  * 4. Neither the name of The NetBSD Foundation nor the names of its
22  *    contributors may be used to endorse or promote products derived
23  *    from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 /*
39  * Copyright (c) 1995 Carnegie-Mellon University.
40  * All rights reserved.
41  *
42  * Author: Mark Holland
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  */
64 
65 /***************************************************************
66  * rf_disks.c -- code to perform operations on the actual disks
67  ***************************************************************/
68 
69 #include "rf_types.h"
70 #include "rf_raid.h"
71 #include "rf_alloclist.h"
72 #include "rf_utils.h"
73 #include "rf_configure.h"
74 #include "rf_general.h"
75 #include "rf_options.h"
76 #include "rf_kintf.h"
77 #include "rf_netbsd.h"
78 
79 #include <sys/types.h>
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/vnode.h>
86 
87 /* XXX these should be in a header file somewhere */
88 void rf_UnconfigureVnodes( RF_Raid_t * );
89 int rf_CheckLabels( RF_Raid_t *, RF_Config_t *);
90 static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
91 
92 #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
93 #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
94 
95 /**************************************************************************
96  *
97  * initialize the disks comprising the array
98  *
99  * We want the spare disks to have regular row,col numbers so that we can
100  * easily substitue a spare for a failed disk.  But, the driver code assumes
101  * throughout that the array contains numRow by numCol _non-spare_ disks, so
102  * it's not clear how to fit in the spares.  This is an unfortunate holdover
103  * from raidSim.  The quick and dirty fix is to make row zero bigger than the
104  * rest, and put all the spares in it.  This probably needs to get changed
105  * eventually.
106  *
107  **************************************************************************/
108 
109 int
110 rf_ConfigureDisks( listp, raidPtr, cfgPtr )
111 	RF_ShutdownList_t **listp;
112 	RF_Raid_t *raidPtr;
113 	RF_Config_t *cfgPtr;
114 {
115 	RF_RaidDisk_t **disks;
116 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
117 	RF_RowCol_t r, c;
118 	int bs, ret;
119 	unsigned i, count, foundone = 0, numFailuresThisRow;
120 	int force;
121 
122 	force = cfgPtr->force;
123 
124 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
125 	if (ret)
126 		goto fail;
127 
128 	disks = raidPtr->Disks;
129 
130 	for (r = 0; r < raidPtr->numRow; r++) {
131 		numFailuresThisRow = 0;
132 		for (c = 0; c < raidPtr->numCol; c++) {
133 			ret = rf_ConfigureDisk(raidPtr,
134 					       &cfgPtr->devnames[r][c][0],
135 					       &disks[r][c], r, c);
136 
137 			if (ret)
138 				goto fail;
139 
140 			if (disks[r][c].status == rf_ds_optimal) {
141 				raidread_component_label(
142 					 raidPtr->raid_cinfo[r][c].ci_dev,
143 					 raidPtr->raid_cinfo[r][c].ci_vp,
144 					 &raidPtr->raid_cinfo[r][c].ci_label);
145 			}
146 
147 			if (disks[r][c].status != rf_ds_optimal) {
148 				numFailuresThisRow++;
149 			} else {
150 				if (disks[r][c].numBlocks < min_numblks)
151 					min_numblks = disks[r][c].numBlocks;
152 				DPRINTF7("Disk at row %d col %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n",
153 				    r, c, disks[r][c].devname,
154 				    (long int) disks[r][c].numBlocks,
155 				    disks[r][c].blockSize,
156 				    (long int) disks[r][c].numBlocks *
157 					 disks[r][c].blockSize / 1024 / 1024);
158 			}
159 		}
160 		/* XXX fix for n-fault tolerant */
161 		/* XXX this should probably check to see how many failures
162 		   we can handle for this configuration! */
163 		if (numFailuresThisRow > 0)
164 			raidPtr->status[r] = rf_rs_degraded;
165 	}
166 
167 	/* all disks must be the same size & have the same block size, bs must
168 	 * be a power of 2 */
169 	bs = 0;
170 	for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
171 		for (c = 0; !foundone && c < raidPtr->numCol; c++) {
172 			if (disks[r][c].status == rf_ds_optimal) {
173 				bs = disks[r][c].blockSize;
174 				foundone = 1;
175 			}
176 		}
177 	}
178 	if (!foundone) {
179 		RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
180 		ret = EINVAL;
181 		goto fail;
182 	}
183 	for (count = 0, i = 1; i; i <<= 1)
184 		if (bs & i)
185 			count++;
186 	if (count != 1) {
187 		RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
188 		ret = EINVAL;
189 		goto fail;
190 	}
191 
192 	if (rf_CheckLabels( raidPtr, cfgPtr )) {
193 		printf("raid%d: There were fatal errors\n", raidPtr->raidid);
194 		if (force != 0) {
195 			printf("raid%d: Fatal errors being ignored.\n",
196 			       raidPtr->raidid);
197 		} else {
198 			ret = EINVAL;
199 			goto fail;
200 		}
201 	}
202 
203 	for (r = 0; r < raidPtr->numRow; r++) {
204 		for (c = 0; c < raidPtr->numCol; c++) {
205 			if (disks[r][c].status == rf_ds_optimal) {
206 				if (disks[r][c].blockSize != bs) {
207 					RF_ERRORMSG2("Error: block size of disk at r %d c %d different from disk at r 0 c 0\n", r, c);
208 					ret = EINVAL;
209 					goto fail;
210 				}
211 				if (disks[r][c].numBlocks != min_numblks) {
212 					RF_ERRORMSG3("WARNING: truncating disk at r %d c %d to %d blocks\n",
213 					    r, c, (int) min_numblks);
214 					disks[r][c].numBlocks = min_numblks;
215 				}
216 			}
217 		}
218 	}
219 
220 	raidPtr->sectorsPerDisk = min_numblks;
221 	raidPtr->logBytesPerSector = ffs(bs) - 1;
222 	raidPtr->bytesPerSector = bs;
223 	raidPtr->sectorMask = bs - 1;
224 	return (0);
225 
226 fail:
227 
228 	rf_UnconfigureVnodes( raidPtr );
229 
230 	return (ret);
231 }
232 
233 
234 /****************************************************************************
235  * set up the data structures describing the spare disks in the array
236  * recall from the above comment that the spare disk descriptors are stored
237  * in row zero, which is specially expanded to hold them.
238  ****************************************************************************/
239 int
240 rf_ConfigureSpareDisks( listp, raidPtr, cfgPtr )
241 	RF_ShutdownList_t ** listp;
242 	RF_Raid_t * raidPtr;
243 	RF_Config_t * cfgPtr;
244 {
245 	int     i, ret;
246 	unsigned int bs;
247 	RF_RaidDisk_t *disks;
248 	int     num_spares_done;
249 
250 	num_spares_done = 0;
251 
252 	/* The space for the spares should have already been allocated by
253 	 * ConfigureDisks() */
254 
255 	disks = &raidPtr->Disks[0][raidPtr->numCol];
256 	for (i = 0; i < raidPtr->numSpare; i++) {
257 		ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
258 				       &disks[i], 0, raidPtr->numCol + i);
259 		if (ret)
260 			goto fail;
261 		if (disks[i].status != rf_ds_optimal) {
262 			RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
263 				     &cfgPtr->spare_names[i][0]);
264 		} else {
265 			disks[i].status = rf_ds_spare;	/* change status to
266 							 * spare */
267 			DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", i,
268 			    disks[i].devname,
269 			    (long int) disks[i].numBlocks, disks[i].blockSize,
270 			    (long int) disks[i].numBlocks *
271 				 disks[i].blockSize / 1024 / 1024);
272 		}
273 		num_spares_done++;
274 	}
275 
276 	/* check sizes and block sizes on spare disks */
277 	bs = 1 << raidPtr->logBytesPerSector;
278 	for (i = 0; i < raidPtr->numSpare; i++) {
279 		if (disks[i].blockSize != bs) {
280 			RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
281 			ret = EINVAL;
282 			goto fail;
283 		}
284 		if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
285 			RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
286 				     disks[i].devname, disks[i].blockSize,
287 				     (long int) raidPtr->sectorsPerDisk);
288 			ret = EINVAL;
289 			goto fail;
290 		} else
291 			if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
292 				RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[i].devname, (long int) raidPtr->sectorsPerDisk);
293 
294 				disks[i].numBlocks = raidPtr->sectorsPerDisk;
295 			}
296 	}
297 
298 	return (0);
299 
300 fail:
301 
302 	/* Release the hold on the main components.  We've failed to allocate
303 	 * a spare, and since we're failing, we need to free things..
304 
305 	 XXX failing to allocate a spare is *not* that big of a deal...
306 	 We *can* survive without it, if need be, esp. if we get hot
307 	 adding working.
308 
309 	 If we don't fail out here, then we need a way to remove this spare...
310 	 that should be easier to do here than if we are "live"...
311 
312 	 */
313 
314 	rf_UnconfigureVnodes( raidPtr );
315 
316 	return (ret);
317 }
318 
319 static int
320 rf_AllocDiskStructures(raidPtr, cfgPtr)
321 	RF_Raid_t *raidPtr;
322  	RF_Config_t *cfgPtr;
323 {
324 	RF_RaidDisk_t **disks;
325 	int ret;
326 	int r;
327 
328 	RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
329 			(RF_RaidDisk_t **), raidPtr->cleanupList);
330 	if (disks == NULL) {
331 		ret = ENOMEM;
332 		goto fail;
333 	}
334 	raidPtr->Disks = disks;
335 	/* get space for the device-specific stuff... */
336 	RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
337 	    sizeof(struct raidcinfo *), (struct raidcinfo **),
338 	    raidPtr->cleanupList);
339 	if (raidPtr->raid_cinfo == NULL) {
340 		ret = ENOMEM;
341 		goto fail;
342 	}
343 
344 	for (r = 0; r < raidPtr->numRow; r++) {
345 		/* We allocate RF_MAXSPARE on the first row so that we
346 		   have room to do hot-swapping of spares */
347 		RF_CallocAndAdd(disks[r], raidPtr->numCol
348 				+ ((r == 0) ? RF_MAXSPARE : 0),
349 				sizeof(RF_RaidDisk_t), (RF_RaidDisk_t *),
350 				raidPtr->cleanupList);
351 		if (disks[r] == NULL) {
352 			ret = ENOMEM;
353 			goto fail;
354 		}
355 		/* get more space for device specific stuff.. */
356 		RF_CallocAndAdd(raidPtr->raid_cinfo[r],
357 		    raidPtr->numCol + ((r == 0) ? raidPtr->numSpare : 0),
358 		    sizeof(struct raidcinfo), (struct raidcinfo *),
359 		    raidPtr->cleanupList);
360 		if (raidPtr->raid_cinfo[r] == NULL) {
361 			ret = ENOMEM;
362 			goto fail;
363 		}
364 	}
365 	return(0);
366 fail:
367 	rf_UnconfigureVnodes( raidPtr );
368 
369 	return(ret);
370 }
371 
372 
373 /* configure a single disk during auto-configuration at boot */
374 int
375 rf_AutoConfigureDisks(raidPtr, cfgPtr, auto_config)
376 	RF_Raid_t *raidPtr;
377 	RF_Config_t *cfgPtr;
378 	RF_AutoConfig_t *auto_config;
379 {
380 	RF_RaidDisk_t **disks;
381 	RF_RaidDisk_t *diskPtr;
382 	RF_RowCol_t r, c;
383 	RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
384 	int bs, ret;
385 	int numFailuresThisRow;
386 	int force;
387 	RF_AutoConfig_t *ac;
388 	int parity_good;
389 	int mod_counter;
390 
391 #if DEBUG
392 	printf("Starting autoconfiguration of RAID set...\n");
393 #endif
394 	force = cfgPtr->force;
395 
396 	ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
397 	if (ret)
398 		goto fail;
399 
400 	disks = raidPtr->Disks;
401 
402 	/* assume the parity will be fine.. */
403 	parity_good = RF_RAID_CLEAN;
404 
405 	/* Check for mod_counters that are too low */
406 	mod_counter = -1;
407 	ac = auto_config;
408 	while(ac!=NULL) {
409 		if (ac->clabel->mod_counter > mod_counter) {
410 			mod_counter = ac->clabel->mod_counter;
411 		}
412 		ac = ac->next;
413 	}
414 	if (mod_counter == -1) {
415 		/* mod_counters were all negative!?!?!?
416 		   Ok, we can deal with that. */
417 #if 0
418 		ac = auto_config;
419 		while(ac!=NULL) {
420 			if (ac->clabel->mod_counter > mod_counter) {
421 				mod_counter = ac->clabel->mod_counter;
422 			}
423 			ac = ac->next;
424 		}
425 #endif
426 	}
427 
428 	for (r = 0; r < raidPtr->numRow; r++) {
429 		numFailuresThisRow = 0;
430 		for (c = 0; c < raidPtr->numCol; c++) {
431 			diskPtr = &disks[r][c];
432 
433 			/* find this row/col in the autoconfig */
434 #if DEBUG
435 			printf("Looking for %d,%d in autoconfig\n",r,c);
436 #endif
437 			ac = auto_config;
438 			while(ac!=NULL) {
439 				if (ac->clabel==NULL) {
440 					/* big-time bad news. */
441 					goto fail;
442 				}
443 				if ((ac->clabel->row == r) &&
444 				    (ac->clabel->column == c)) {
445 					/* it's this one... */
446 #if DEBUG
447 					printf("Found: %s at %d,%d\n",
448 					       ac->devname,r,c);
449 #endif
450 
451 					break;
452 				}
453 				ac=ac->next;
454 			}
455 
456 			if (ac!=NULL) {
457 				/* Found it.  Configure it.. */
458 				diskPtr->blockSize = ac->clabel->blockSize;
459 				diskPtr->numBlocks = ac->clabel->numBlocks;
460 				/* Note: rf_protectedSectors is already
461 				   factored into numBlocks here */
462 				raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
463 				raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
464 
465 				memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
466 				       ac->clabel, sizeof(*ac->clabel));
467 				sprintf(diskPtr->devname, "/dev/%s",
468 					ac->devname);
469 
470 				/* note the fact that this component was
471 				   autoconfigured.  You'll need this info
472 				   later.  Trust me :) */
473 				diskPtr->auto_configured = 1;
474 				diskPtr->dev = ac->dev;
475 
476 				/*
477 				 * we allow the user to specify that
478 				 * only a fraction of the disks should
479 				 * be used this is just for debug: it
480 				 * speeds up the parity scan
481 				 */
482 
483 				diskPtr->numBlocks = diskPtr->numBlocks *
484 					rf_sizePercentage / 100;
485 
486 				/* XXX these will get set multiple times,
487 				   but since we're autoconfiguring, they'd
488 				   better be always the same each time!
489 				   If not, this is the least of your worries */
490 
491 				bs = diskPtr->blockSize;
492 				min_numblks = diskPtr->numBlocks;
493 
494 				/* this gets done multiple times, but that's
495 				   fine -- the serial number will be the same
496 				   for all components, guaranteed */
497 				raidPtr->serial_number =
498 					ac->clabel->serial_number;
499 				/* check the last time the label
500 				   was modified */
501 				if (ac->clabel->mod_counter !=
502 				    mod_counter) {
503 					/* Even though we've filled in all
504 					   of the above, we don't trust
505 					   this component since it's
506 					   modification counter is not
507 					   in sync with the rest, and we really
508 					   consider it to be failed.  */
509 					disks[r][c].status = rf_ds_failed;
510 					numFailuresThisRow++;
511 				} else {
512 					if (ac->clabel->clean !=
513 					    RF_RAID_CLEAN) {
514 						parity_good = RF_RAID_DIRTY;
515 					}
516 				}
517 			} else {
518 				/* Didn't find it at all!!
519 				   Component must really be dead */
520 				disks[r][c].status = rf_ds_failed;
521 				numFailuresThisRow++;
522 			}
523 		}
524 		/* XXX fix for n-fault tolerant */
525 		/* XXX this should probably check to see how many failures
526 		   we can handle for this configuration! */
527 		if (numFailuresThisRow > 0)
528 			raidPtr->status[r] = rf_rs_degraded;
529 	}
530 
531 	raidPtr->mod_counter = mod_counter;
532 
533 	/* note the state of the parity, if any */
534 	raidPtr->parity_good = parity_good;
535 	raidPtr->sectorsPerDisk = min_numblks;
536 	raidPtr->logBytesPerSector = ffs(bs) - 1;
537 	raidPtr->bytesPerSector = bs;
538 	raidPtr->sectorMask = bs - 1;
539 	return (0);
540 
541 fail:
542 
543 	rf_UnconfigureVnodes( raidPtr );
544 
545 	return (ret);
546 
547 }
548 
549 /* configure a single disk in the array */
550 int
551 rf_ConfigureDisk(raidPtr, buf, diskPtr, row, col)
552 	RF_Raid_t *raidPtr;
553 	char   *buf;
554 	RF_RaidDisk_t *diskPtr;
555 	RF_RowCol_t row;
556 	RF_RowCol_t col;
557 {
558 	char   *p;
559 	int     retcode;
560 
561 	struct partinfo dpart;
562 	struct vnode *vp;
563 	struct vattr va;
564 	struct proc *proc;
565 	int     error;
566 
567 	retcode = 0;
568 	p = rf_find_non_white(buf);
569 	if (p[strlen(p) - 1] == '\n') {
570 		/* strip off the newline */
571 		p[strlen(p) - 1] = '\0';
572 	}
573 	(void) strcpy(diskPtr->devname, p);
574 
575 	proc = raidPtr->engine_thread;
576 
577 	/* Let's start by claiming the component is fine and well... */
578 	diskPtr->status = rf_ds_optimal;
579 
580 	raidPtr->raid_cinfo[row][col].ci_vp = NULL;
581 	raidPtr->raid_cinfo[row][col].ci_dev = NULL;
582 
583 	error = raidlookup(diskPtr->devname, proc, &vp);
584 	if (error) {
585 		printf("raidlookup on device: %s failed!\n", diskPtr->devname);
586 		if (error == ENXIO) {
587 			/* the component isn't there... must be dead :-( */
588 			diskPtr->status = rf_ds_failed;
589 		} else {
590 			return (error);
591 		}
592 	}
593 	if (diskPtr->status == rf_ds_optimal) {
594 
595 		if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
596 			return (error);
597 		}
598 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
599 				  FREAD, proc->p_ucred, proc);
600 		if (error) {
601 			return (error);
602 		}
603 
604 		diskPtr->blockSize = dpart.disklab->d_secsize;
605 
606 		diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors;
607 		diskPtr->partitionSize = dpart.part->p_size;
608 
609 		raidPtr->raid_cinfo[row][col].ci_vp = vp;
610 		raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
611 
612 		/* This component was not automatically configured */
613 		diskPtr->auto_configured = 0;
614 		diskPtr->dev = va.va_rdev;
615 
616 		/* we allow the user to specify that only a fraction of the
617 		 * disks should be used this is just for debug:  it speeds up
618 		 * the parity scan */
619 		diskPtr->numBlocks = diskPtr->numBlocks *
620 			rf_sizePercentage / 100;
621 	}
622 	return (0);
623 }
624 
625 static void rf_print_label_status( RF_Raid_t *, int, int, char *,
626 				  RF_ComponentLabel_t *);
627 
628 static void
629 rf_print_label_status( raidPtr, row, column, dev_name, ci_label )
630 	RF_Raid_t *raidPtr;
631 	int row;
632 	int column;
633 	char *dev_name;
634 	RF_ComponentLabel_t *ci_label;
635 {
636 
637 	printf("raid%d: Component %s being configured at row: %d col: %d\n",
638 	       raidPtr->raidid, dev_name, row, column );
639 	printf("         Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
640 	       ci_label->row, ci_label->column,
641 	       ci_label->num_rows, ci_label->num_columns);
642 	printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
643 	       ci_label->version, ci_label->serial_number,
644 	       ci_label->mod_counter);
645 	printf("         Clean: %s Status: %d\n",
646 	       ci_label->clean ? "Yes" : "No", ci_label->status );
647 }
648 
649 static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
650 				  RF_ComponentLabel_t *, int, int );
651 static int rf_check_label_vitals( raidPtr, row, column, dev_name, ci_label,
652 				  serial_number, mod_counter )
653 	RF_Raid_t *raidPtr;
654 	int row;
655 	int column;
656 	char *dev_name;
657 	RF_ComponentLabel_t *ci_label;
658 	int serial_number;
659 	int mod_counter;
660 {
661 	int fatal_error = 0;
662 
663 	if (serial_number != ci_label->serial_number) {
664 		printf("%s has a different serial number: %d %d\n",
665 		       dev_name, serial_number, ci_label->serial_number);
666 		fatal_error = 1;
667 	}
668 	if (mod_counter != ci_label->mod_counter) {
669 		printf("%s has a different modfication count: %d %d\n",
670 		       dev_name, mod_counter, ci_label->mod_counter);
671 	}
672 
673 	if (row != ci_label->row) {
674 		printf("Row out of alignment for: %s\n", dev_name);
675 		fatal_error = 1;
676 	}
677 	if (column != ci_label->column) {
678 		printf("Column out of alignment for: %s\n", dev_name);
679 		fatal_error = 1;
680 	}
681 	if (raidPtr->numRow != ci_label->num_rows) {
682 		printf("Number of rows do not match for: %s\n", dev_name);
683 		fatal_error = 1;
684 	}
685 	if (raidPtr->numCol != ci_label->num_columns) {
686 		printf("Number of columns do not match for: %s\n", dev_name);
687 		fatal_error = 1;
688 	}
689 	if (ci_label->clean == 0) {
690 		/* it's not clean, but that's not fatal */
691 		printf("%s is not clean!\n", dev_name);
692 	}
693 	return(fatal_error);
694 }
695 
696 
697 /*
698 
699    rf_CheckLabels() - check all the component labels for consistency.
700    Return an error if there is anything major amiss.
701 
702  */
703 
704 int
705 rf_CheckLabels( raidPtr, cfgPtr )
706 	RF_Raid_t *raidPtr;
707 	RF_Config_t *cfgPtr;
708 {
709 	int r,c;
710 	char *dev_name;
711 	RF_ComponentLabel_t *ci_label;
712 	int serial_number = 0;
713 	int mod_number = 0;
714 	int fatal_error = 0;
715 	int mod_values[4];
716 	int mod_count[4];
717 	int ser_values[4];
718 	int ser_count[4];
719 	int num_ser;
720 	int num_mod;
721 	int i;
722 	int found;
723 	int hosed_row;
724 	int hosed_column;
725 	int too_fatal;
726 	int parity_good;
727 	int force;
728 
729 	hosed_row = -1;
730 	hosed_column = -1;
731 	too_fatal = 0;
732 	force = cfgPtr->force;
733 
734 	/*
735 	   We're going to try to be a little intelligent here.  If one
736 	   component's label is bogus, and we can identify that it's the
737 	   *only* one that's gone, we'll mark it as "failed" and allow
738 	   the configuration to proceed.  This will be the *only* case
739 	   that we'll proceed if there would be (otherwise) fatal errors.
740 
741 	   Basically we simply keep a count of how many components had
742 	   what serial number.  If all but one agree, we simply mark
743 	   the disagreeing component as being failed, and allow
744 	   things to come up "normally".
745 
746 	   We do this first for serial numbers, and then for "mod_counter".
747 
748 	 */
749 
750 	num_ser = 0;
751 	num_mod = 0;
752 	for (r = 0; r < raidPtr->numRow && !fatal_error ; r++) {
753 		for (c = 0; c < raidPtr->numCol; c++) {
754 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
755 			found=0;
756 			for(i=0;i<num_ser;i++) {
757 				if (ser_values[i] == ci_label->serial_number) {
758 					ser_count[i]++;
759 					found=1;
760 					break;
761 				}
762 			}
763 			if (!found) {
764 				ser_values[num_ser] = ci_label->serial_number;
765 				ser_count[num_ser] = 1;
766 				num_ser++;
767 				if (num_ser>2) {
768 					fatal_error = 1;
769 					break;
770 				}
771 			}
772 			found=0;
773 			for(i=0;i<num_mod;i++) {
774 				if (mod_values[i] == ci_label->mod_counter) {
775 					mod_count[i]++;
776 					found=1;
777 					break;
778 				}
779 			}
780 			if (!found) {
781 			        mod_values[num_mod] = ci_label->mod_counter;
782 				mod_count[num_mod] = 1;
783 				num_mod++;
784 				if (num_mod>2) {
785 					fatal_error = 1;
786 					break;
787 				}
788 			}
789 		}
790 	}
791 #if DEBUG
792 	printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
793 	for(i=0;i<num_ser;i++) {
794 		printf("%d %d\n", ser_values[i], ser_count[i]);
795 	}
796 	printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
797 	for(i=0;i<num_mod;i++) {
798 		printf("%d %d\n", mod_values[i], mod_count[i]);
799 	}
800 #endif
801 	serial_number = ser_values[0];
802 	if (num_ser == 2) {
803 		if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
804 			/* Locate the maverick component */
805 			if (ser_count[1] > ser_count[0]) {
806 				serial_number = ser_values[1];
807 			}
808 			for (r = 0; r < raidPtr->numRow; r++) {
809 				for (c = 0; c < raidPtr->numCol; c++) {
810 				ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
811 					if (serial_number !=
812 					    ci_label->serial_number) {
813 						hosed_row = r;
814 						hosed_column = c;
815 						break;
816 					}
817 				}
818 			}
819 			printf("Hosed component: %s\n",
820 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
821 			if (!force) {
822 				/* we'll fail this component, as if there are
823 				   other major errors, we arn't forcing things
824 				   and we'll abort the config anyways */
825 				raidPtr->Disks[hosed_row][hosed_column].status
826 					= rf_ds_failed;
827 				raidPtr->numFailures++;
828 				raidPtr->status[hosed_row] = rf_rs_degraded;
829 			}
830 		} else {
831 			too_fatal = 1;
832 		}
833 		if (cfgPtr->parityConfig == '0') {
834 			/* We've identified two different serial numbers.
835 			   RAID 0 can't cope with that, so we'll punt */
836 			too_fatal = 1;
837 		}
838 
839 	}
840 
841 	/* record the serial number for later.  If we bail later, setting
842 	   this doesn't matter, otherwise we've got the best guess at the
843 	   correct serial number */
844 	raidPtr->serial_number = serial_number;
845 
846 	mod_number = mod_values[0];
847 	if (num_mod == 2) {
848 		if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
849 			/* Locate the maverick component */
850 			if (mod_count[1] > mod_count[0]) {
851 				mod_number = mod_values[1];
852 			} else if (mod_count[1] < mod_count[0]) {
853 				mod_number = mod_values[0];
854 			} else {
855 				/* counts of different modification values
856 				   are the same.   Assume greater value is
857 				   the correct one, all other things
858 				   considered */
859 				if (mod_values[0] > mod_values[1]) {
860 					mod_number = mod_values[0];
861 				} else {
862 					mod_number = mod_values[1];
863 				}
864 
865 			}
866 			for (r = 0; r < raidPtr->numRow && !too_fatal ; r++) {
867 				for (c = 0; c < raidPtr->numCol; c++) {
868 					ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
869 					if (mod_number !=
870 					    ci_label->mod_counter) {
871 						if ( ( hosed_row == r ) &&
872 						     ( hosed_column == c )) {
873 							/* same one.  Can
874 							   deal with it.  */
875 						} else {
876 							hosed_row = r;
877 							hosed_column = c;
878 							if (num_ser != 1) {
879 								too_fatal = 1;
880 								break;
881 							}
882 						}
883 					}
884 				}
885 			}
886 			printf("Hosed component: %s\n",
887 			       &cfgPtr->devnames[hosed_row][hosed_column][0]);
888 			if (!force) {
889 				/* we'll fail this component, as if there are
890 				   other major errors, we arn't forcing things
891 				   and we'll abort the config anyways */
892 				if (raidPtr->Disks[hosed_row][hosed_column].status != rf_ds_failed) {
893 					raidPtr->Disks[hosed_row][hosed_column].status
894 						= rf_ds_failed;
895 					raidPtr->numFailures++;
896 					raidPtr->status[hosed_row] = rf_rs_degraded;
897 				}
898 			}
899 		} else {
900 			too_fatal = 1;
901 		}
902 		if (cfgPtr->parityConfig == '0') {
903 			/* We've identified two different mod counters.
904 			   RAID 0 can't cope with that, so we'll punt */
905 			too_fatal = 1;
906 		}
907 	}
908 
909 	raidPtr->mod_counter = mod_number;
910 
911 	if (too_fatal) {
912 		/* we've had both a serial number mismatch, and a mod_counter
913 		   mismatch -- and they involved two different components!!
914 		   Bail -- make things fail so that the user must force
915 		   the issue... */
916 		hosed_row = -1;
917 		hosed_column = -1;
918 	}
919 
920 	if (num_ser > 2) {
921 		printf("raid%d: Too many different serial numbers!\n",
922 		       raidPtr->raidid);
923 	}
924 
925 	if (num_mod > 2) {
926 		printf("raid%d: Too many different mod counters!\n",
927 		       raidPtr->raidid);
928 	}
929 
930 	/* we start by assuming the parity will be good, and flee from
931 	   that notion at the slightest sign of trouble */
932 
933 	parity_good = RF_RAID_CLEAN;
934 	for (r = 0; r < raidPtr->numRow; r++) {
935 		for (c = 0; c < raidPtr->numCol; c++) {
936 			dev_name = &cfgPtr->devnames[r][c][0];
937 			ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
938 
939 			if ((r == hosed_row) && (c == hosed_column)) {
940 				printf("raid%d: Ignoring %s\n",
941 				       raidPtr->raidid, dev_name);
942 			} else {
943 				rf_print_label_status( raidPtr, r, c,
944 						       dev_name, ci_label );
945 				if (rf_check_label_vitals( raidPtr, r, c,
946 							   dev_name, ci_label,
947 							   serial_number,
948 							   mod_number )) {
949 					fatal_error = 1;
950 				}
951 				if (ci_label->clean != RF_RAID_CLEAN) {
952 					parity_good = RF_RAID_DIRTY;
953 				}
954 			}
955 		}
956 	}
957 	if (fatal_error) {
958 		parity_good = RF_RAID_DIRTY;
959 	}
960 
961 	/* we note the state of the parity */
962 	raidPtr->parity_good = parity_good;
963 
964 	return(fatal_error);
965 }
966 
967 int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *);
968 int
969 rf_add_hot_spare(raidPtr, sparePtr)
970 	RF_Raid_t *raidPtr;
971 	RF_SingleComponent_t *sparePtr;
972 {
973 	RF_RaidDisk_t *disks;
974 	RF_DiskQueue_t *spareQueues;
975 	int ret;
976 	unsigned int bs;
977 	int spare_number;
978 
979 	printf("Just in rf_add_hot_spare: %d\n",raidPtr->numSpare);
980 	printf("Num col: %d\n",raidPtr->numCol);
981 	if (raidPtr->numSpare >= RF_MAXSPARE) {
982 		RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
983 		return(EINVAL);
984 	}
985 
986 	RF_LOCK_MUTEX(raidPtr->mutex);
987 
988 	/* the beginning of the spares... */
989 	disks = &raidPtr->Disks[0][raidPtr->numCol];
990 
991 	spare_number = raidPtr->numSpare;
992 
993 	ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
994 			       &disks[spare_number], 0,
995 			       raidPtr->numCol + spare_number);
996 
997 	if (ret)
998 		goto fail;
999 	if (disks[spare_number].status != rf_ds_optimal) {
1000 		RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
1001 			     sparePtr->component_name);
1002 		ret=EINVAL;
1003 		goto fail;
1004 	} else {
1005 		disks[spare_number].status = rf_ds_spare;
1006 		DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d (%ld MB)\n", spare_number,
1007 			 disks[spare_number].devname,
1008 			 (long int) disks[spare_number].numBlocks,
1009 			 disks[spare_number].blockSize,
1010 			 (long int) disks[spare_number].numBlocks *
1011 			 disks[spare_number].blockSize / 1024 / 1024);
1012 	}
1013 
1014 
1015 	/* check sizes and block sizes on the spare disk */
1016 	bs = 1 << raidPtr->logBytesPerSector;
1017 	if (disks[spare_number].blockSize != bs) {
1018 		RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
1019 		ret = EINVAL;
1020 		goto fail;
1021 	}
1022 	if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1023 		RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %ld blocks)\n",
1024 			     disks[spare_number].devname,
1025 			     disks[spare_number].blockSize,
1026 			     (long int) raidPtr->sectorsPerDisk);
1027 		ret = EINVAL;
1028 		goto fail;
1029 	} else {
1030 		if (disks[spare_number].numBlocks >
1031 		    raidPtr->sectorsPerDisk) {
1032 			RF_ERRORMSG2("Warning: truncating spare disk %s to %ld blocks\n", disks[spare_number].devname,
1033 				     (long int) raidPtr->sectorsPerDisk);
1034 
1035 			disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1036 		}
1037 	}
1038 
1039 	spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1040 	ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
1041 				 0, raidPtr->numCol + spare_number,
1042 				 raidPtr->qType,
1043 				 raidPtr->sectorsPerDisk,
1044 				 raidPtr->Disks[0][raidPtr->numCol +
1045 						  spare_number].dev,
1046 				 raidPtr->maxOutstanding,
1047 				 &raidPtr->shutdownList,
1048 				 raidPtr->cleanupList);
1049 
1050 
1051 	raidPtr->numSpare++;
1052 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1053 	return (0);
1054 
1055 fail:
1056 	RF_UNLOCK_MUTEX(raidPtr->mutex);
1057 	return(ret);
1058 }
1059 
1060 int
1061 rf_remove_hot_spare(raidPtr,sparePtr)
1062 	RF_Raid_t *raidPtr;
1063 	RF_SingleComponent_t *sparePtr;
1064 {
1065 	int spare_number;
1066 
1067 
1068 	if (raidPtr->numSpare==0) {
1069 		printf("No spares to remove!\n");
1070 		return(EINVAL);
1071 	}
1072 
1073 	spare_number = sparePtr->column;
1074 
1075 	return(EINVAL); /* XXX not implemented yet */
1076 #if 0
1077 	if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1078 		return(EINVAL);
1079 	}
1080 
1081 	/* verify that this spare isn't in use... */
1082 
1083 
1084 
1085 
1086 	/* it's gone.. */
1087 
1088 	raidPtr->numSpare--;
1089 
1090 	return(0);
1091 #endif
1092 }
1093 
1094 
1095