1 /* $NetBSD: rf_netbsdkintf.c,v 1.279 2010/12/22 05:51:19 christos Exp $ */ 2 /*- 3 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster; Jason R. Thorpe. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * Copyright (c) 1990, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * This code is derived from software contributed to Berkeley by 36 * the Systems Programming Group of the University of Utah Computer 37 * Science Department. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 * 63 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 64 * 65 * @(#)cd.c 8.2 (Berkeley) 11/16/93 66 */ 67 68 /* 69 * Copyright (c) 1988 University of Utah. 70 * 71 * This code is derived from software contributed to Berkeley by 72 * the Systems Programming Group of the University of Utah Computer 73 * Science Department. 74 * 75 * Redistribution and use in source and binary forms, with or without 76 * modification, are permitted provided that the following conditions 77 * are met: 78 * 1. Redistributions of source code must retain the above copyright 79 * notice, this list of conditions and the following disclaimer. 80 * 2. Redistributions in binary form must reproduce the above copyright 81 * notice, this list of conditions and the following disclaimer in the 82 * documentation and/or other materials provided with the distribution. 83 * 3. All advertising materials mentioning features or use of this software 84 * must display the following acknowledgement: 85 * This product includes software developed by the University of 86 * California, Berkeley and its contributors. 87 * 4. Neither the name of the University nor the names of its contributors 88 * may be used to endorse or promote products derived from this software 89 * without specific prior written permission. 90 * 91 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 92 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 93 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 94 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 95 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 96 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 97 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 98 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 99 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 100 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 101 * SUCH DAMAGE. 102 * 103 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 104 * 105 * @(#)cd.c 8.2 (Berkeley) 11/16/93 106 */ 107 108 /* 109 * Copyright (c) 1995 Carnegie-Mellon University. 110 * All rights reserved. 111 * 112 * Authors: Mark Holland, Jim Zelenka 113 * 114 * Permission to use, copy, modify and distribute this software and 115 * its documentation is hereby granted, provided that both the copyright 116 * notice and this permission notice appear in all copies of the 117 * software, derivative works or modified versions, and any portions 118 * thereof, and that both notices appear in supporting documentation. 119 * 120 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 121 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 122 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 123 * 124 * Carnegie Mellon requests users of this software to return to 125 * 126 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 127 * School of Computer Science 128 * Carnegie Mellon University 129 * Pittsburgh PA 15213-3890 130 * 131 * any improvements or extensions that they make and grant Carnegie the 132 * rights to redistribute these changes. 133 */ 134 135 /*********************************************************** 136 * 137 * rf_kintf.c -- the kernel interface routines for RAIDframe 138 * 139 ***********************************************************/ 140 141 #include <sys/cdefs.h> 142 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.279 2010/12/22 05:51:19 christos Exp $"); 143 144 #ifdef _KERNEL_OPT 145 #include "opt_compat_netbsd.h" 146 #include "opt_raid_autoconfig.h" 147 #include "raid.h" 148 #endif 149 150 #include <sys/param.h> 151 #include <sys/errno.h> 152 #include <sys/pool.h> 153 #include <sys/proc.h> 154 #include <sys/queue.h> 155 #include <sys/disk.h> 156 #include <sys/device.h> 157 #include <sys/stat.h> 158 #include <sys/ioctl.h> 159 #include <sys/fcntl.h> 160 #include <sys/systm.h> 161 #include <sys/vnode.h> 162 #include <sys/disklabel.h> 163 #include <sys/conf.h> 164 #include <sys/buf.h> 165 #include <sys/bufq.h> 166 #include <sys/reboot.h> 167 #include <sys/kauth.h> 168 169 #include <prop/proplib.h> 170 171 #include <dev/raidframe/raidframevar.h> 172 #include <dev/raidframe/raidframeio.h> 173 #include <dev/raidframe/rf_paritymap.h> 174 175 #include "rf_raid.h" 176 #include "rf_copyback.h" 177 #include "rf_dag.h" 178 #include "rf_dagflags.h" 179 #include "rf_desc.h" 180 #include "rf_diskqueue.h" 181 #include "rf_etimer.h" 182 #include "rf_general.h" 183 #include "rf_kintf.h" 184 #include "rf_options.h" 185 #include "rf_driver.h" 186 #include "rf_parityscan.h" 187 #include "rf_threadstuff.h" 188 189 #ifdef COMPAT_50 190 #include "rf_compat50.h" 191 #endif 192 193 #ifdef DEBUG 194 int rf_kdebug_level = 0; 195 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 196 #else /* DEBUG */ 197 #define db1_printf(a) { } 198 #endif /* DEBUG */ 199 200 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 201 202 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 203 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) 204 205 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 206 * spare table */ 207 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 208 * installation process */ 209 #endif 210 211 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 212 213 /* prototypes */ 214 static void KernelWakeupFunc(struct buf *); 215 static void InitBP(struct buf *, struct vnode *, unsigned, 216 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 217 void *, int, struct proc *); 218 static void raidinit(RF_Raid_t *); 219 220 void raidattach(int); 221 static int raid_match(device_t, cfdata_t, void *); 222 static void raid_attach(device_t, device_t, void *); 223 static int raid_detach(device_t, int); 224 225 static int raidread_component_area(dev_t, struct vnode *, void *, size_t, 226 daddr_t, daddr_t); 227 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t, 228 daddr_t, daddr_t, int); 229 230 static int raidwrite_component_label(unsigned, 231 dev_t, struct vnode *, RF_ComponentLabel_t *); 232 static int raidread_component_label(unsigned, 233 dev_t, struct vnode *, RF_ComponentLabel_t *); 234 235 236 dev_type_open(raidopen); 237 dev_type_close(raidclose); 238 dev_type_read(raidread); 239 dev_type_write(raidwrite); 240 dev_type_ioctl(raidioctl); 241 dev_type_strategy(raidstrategy); 242 dev_type_dump(raiddump); 243 dev_type_size(raidsize); 244 245 const struct bdevsw raid_bdevsw = { 246 raidopen, raidclose, raidstrategy, raidioctl, 247 raiddump, raidsize, D_DISK 248 }; 249 250 const struct cdevsw raid_cdevsw = { 251 raidopen, raidclose, raidread, raidwrite, raidioctl, 252 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 253 }; 254 255 static struct dkdriver rf_dkdriver = { raidstrategy, minphys }; 256 257 /* XXX Not sure if the following should be replacing the raidPtrs above, 258 or if it should be used in conjunction with that... 259 */ 260 261 struct raid_softc { 262 device_t sc_dev; 263 int sc_flags; /* flags */ 264 int sc_cflags; /* configuration flags */ 265 uint64_t sc_size; /* size of the raid device */ 266 char sc_xname[20]; /* XXX external name */ 267 struct disk sc_dkdev; /* generic disk device info */ 268 struct bufq_state *buf_queue; /* used for the device queue */ 269 }; 270 /* sc_flags */ 271 #define RAIDF_INITED 0x01 /* unit has been initialized */ 272 #define RAIDF_WLABEL 0x02 /* label area is writable */ 273 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 274 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */ 275 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 276 #define RAIDF_LOCKED 0x80 /* unit is locked */ 277 278 #define raidunit(x) DISKUNIT(x) 279 int numraid = 0; 280 281 extern struct cfdriver raid_cd; 282 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc), 283 raid_match, raid_attach, raid_detach, NULL, NULL, NULL, 284 DVF_DETACH_SHUTDOWN); 285 286 /* 287 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 288 * Be aware that large numbers can allow the driver to consume a lot of 289 * kernel memory, especially on writes, and in degraded mode reads. 290 * 291 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 292 * a single 64K write will typically require 64K for the old data, 293 * 64K for the old parity, and 64K for the new parity, for a total 294 * of 192K (if the parity buffer is not re-used immediately). 295 * Even it if is used immediately, that's still 128K, which when multiplied 296 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 297 * 298 * Now in degraded mode, for example, a 64K read on the above setup may 299 * require data reconstruction, which will require *all* of the 4 remaining 300 * disks to participate -- 4 * 32K/disk == 128K again. 301 */ 302 303 #ifndef RAIDOUTSTANDING 304 #define RAIDOUTSTANDING 6 305 #endif 306 307 #define RAIDLABELDEV(dev) \ 308 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 309 310 /* declared here, and made public, for the benefit of KVM stuff.. */ 311 struct raid_softc *raid_softc; 312 313 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 314 struct disklabel *); 315 static void raidgetdisklabel(dev_t); 316 static void raidmakedisklabel(struct raid_softc *); 317 318 static int raidlock(struct raid_softc *); 319 static void raidunlock(struct raid_softc *); 320 321 static int raid_detach_unlocked(struct raid_softc *); 322 323 static void rf_markalldirty(RF_Raid_t *); 324 static void rf_set_properties(struct raid_softc *, RF_Raid_t *); 325 326 void rf_ReconThread(struct rf_recon_req *); 327 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 328 void rf_CopybackThread(RF_Raid_t *raidPtr); 329 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 330 int rf_autoconfig(device_t); 331 void rf_buildroothack(RF_ConfigSet_t *); 332 333 RF_AutoConfig_t *rf_find_raid_components(void); 334 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 335 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 336 static int rf_reasonable_label(RF_ComponentLabel_t *); 337 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 338 int rf_set_autoconfig(RF_Raid_t *, int); 339 int rf_set_rootpartition(RF_Raid_t *, int); 340 void rf_release_all_vps(RF_ConfigSet_t *); 341 void rf_cleanup_config_set(RF_ConfigSet_t *); 342 int rf_have_enough_components(RF_ConfigSet_t *); 343 int rf_auto_config_set(RF_ConfigSet_t *, int *); 344 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t); 345 346 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not 347 allow autoconfig to take place. 348 Note that this is overridden by having 349 RAID_AUTOCONFIG as an option in the 350 kernel config file. */ 351 352 struct RF_Pools_s rf_pools; 353 354 void 355 raidattach(int num) 356 { 357 int raidID; 358 int i, rc; 359 360 aprint_debug("raidattach: Asked for %d units\n", num); 361 362 if (num <= 0) { 363 #ifdef DIAGNOSTIC 364 panic("raidattach: count <= 0"); 365 #endif 366 return; 367 } 368 /* This is where all the initialization stuff gets done. */ 369 370 numraid = num; 371 372 /* Make some space for requested number of units... */ 373 374 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **)); 375 if (raidPtrs == NULL) { 376 panic("raidPtrs is NULL!!"); 377 } 378 379 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 380 rf_mutex_init(&rf_sparet_wait_mutex); 381 382 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 383 #endif 384 385 for (i = 0; i < num; i++) 386 raidPtrs[i] = NULL; 387 rc = rf_BootRaidframe(); 388 if (rc == 0) 389 aprint_verbose("Kernelized RAIDframe activated\n"); 390 else 391 panic("Serious error booting RAID!!"); 392 393 /* put together some datastructures like the CCD device does.. This 394 * lets us lock the device and what-not when it gets opened. */ 395 396 raid_softc = (struct raid_softc *) 397 malloc(num * sizeof(struct raid_softc), 398 M_RAIDFRAME, M_NOWAIT); 399 if (raid_softc == NULL) { 400 aprint_error("WARNING: no memory for RAIDframe driver\n"); 401 return; 402 } 403 404 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 405 406 for (raidID = 0; raidID < num; raidID++) { 407 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0); 408 409 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t), 410 (RF_Raid_t *)); 411 if (raidPtrs[raidID] == NULL) { 412 aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID); 413 numraid = raidID; 414 return; 415 } 416 } 417 418 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) { 419 aprint_error("raidattach: config_cfattach_attach failed?\n"); 420 } 421 422 #ifdef RAID_AUTOCONFIG 423 raidautoconfig = 1; 424 #endif 425 426 /* 427 * Register a finalizer which will be used to auto-config RAID 428 * sets once all real hardware devices have been found. 429 */ 430 if (config_finalize_register(NULL, rf_autoconfig) != 0) 431 aprint_error("WARNING: unable to register RAIDframe finalizer\n"); 432 } 433 434 int 435 rf_autoconfig(device_t self) 436 { 437 RF_AutoConfig_t *ac_list; 438 RF_ConfigSet_t *config_sets; 439 440 if (raidautoconfig == 0) 441 return (0); 442 443 /* XXX This code can only be run once. */ 444 raidautoconfig = 0; 445 446 /* 1. locate all RAID components on the system */ 447 aprint_debug("Searching for RAID components...\n"); 448 ac_list = rf_find_raid_components(); 449 450 /* 2. Sort them into their respective sets. */ 451 config_sets = rf_create_auto_sets(ac_list); 452 453 /* 454 * 3. Evaluate each set andconfigure the valid ones. 455 * This gets done in rf_buildroothack(). 456 */ 457 rf_buildroothack(config_sets); 458 459 return 1; 460 } 461 462 void 463 rf_buildroothack(RF_ConfigSet_t *config_sets) 464 { 465 RF_ConfigSet_t *cset; 466 RF_ConfigSet_t *next_cset; 467 int retcode; 468 int raidID; 469 int rootID; 470 int col; 471 int num_root; 472 char *devname; 473 474 rootID = 0; 475 num_root = 0; 476 cset = config_sets; 477 while (cset != NULL) { 478 next_cset = cset->next; 479 if (rf_have_enough_components(cset) && 480 cset->ac->clabel->autoconfigure==1) { 481 retcode = rf_auto_config_set(cset,&raidID); 482 if (!retcode) { 483 aprint_debug("raid%d: configured ok\n", raidID); 484 if (cset->rootable) { 485 rootID = raidID; 486 num_root++; 487 } 488 } else { 489 /* The autoconfig didn't work :( */ 490 aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 491 rf_release_all_vps(cset); 492 } 493 } else { 494 /* we're not autoconfiguring this set... 495 release the associated resources */ 496 rf_release_all_vps(cset); 497 } 498 /* cleanup */ 499 rf_cleanup_config_set(cset); 500 cset = next_cset; 501 } 502 503 /* if the user has specified what the root device should be 504 then we don't touch booted_device or boothowto... */ 505 506 if (rootspec != NULL) 507 return; 508 509 /* we found something bootable... */ 510 511 if (num_root == 1) { 512 booted_device = raid_softc[rootID].sc_dev; 513 } else if (num_root > 1) { 514 515 /* 516 * Maybe the MD code can help. If it cannot, then 517 * setroot() will discover that we have no 518 * booted_device and will ask the user if nothing was 519 * hardwired in the kernel config file 520 */ 521 522 if (booted_device == NULL) 523 cpu_rootconf(); 524 if (booted_device == NULL) 525 return; 526 527 num_root = 0; 528 for (raidID = 0; raidID < numraid; raidID++) { 529 if (raidPtrs[raidID]->valid == 0) 530 continue; 531 532 if (raidPtrs[raidID]->root_partition == 0) 533 continue; 534 535 for (col = 0; col < raidPtrs[raidID]->numCol; col++) { 536 devname = raidPtrs[raidID]->Disks[col].devname; 537 devname += sizeof("/dev/") - 1; 538 if (strncmp(devname, device_xname(booted_device), 539 strlen(device_xname(booted_device))) != 0) 540 continue; 541 aprint_debug("raid%d includes boot device %s\n", 542 raidID, devname); 543 num_root++; 544 rootID = raidID; 545 } 546 } 547 548 if (num_root == 1) { 549 booted_device = raid_softc[rootID].sc_dev; 550 } else { 551 /* we can't guess.. require the user to answer... */ 552 boothowto |= RB_ASKNAME; 553 } 554 } 555 } 556 557 558 int 559 raidsize(dev_t dev) 560 { 561 struct raid_softc *rs; 562 struct disklabel *lp; 563 int part, unit, omask, size; 564 565 unit = raidunit(dev); 566 if (unit >= numraid) 567 return (-1); 568 rs = &raid_softc[unit]; 569 570 if ((rs->sc_flags & RAIDF_INITED) == 0) 571 return (-1); 572 573 part = DISKPART(dev); 574 omask = rs->sc_dkdev.dk_openmask & (1 << part); 575 lp = rs->sc_dkdev.dk_label; 576 577 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp)) 578 return (-1); 579 580 if (lp->d_partitions[part].p_fstype != FS_SWAP) 581 size = -1; 582 else 583 size = lp->d_partitions[part].p_size * 584 (lp->d_secsize / DEV_BSIZE); 585 586 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp)) 587 return (-1); 588 589 return (size); 590 591 } 592 593 int 594 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 595 { 596 int unit = raidunit(dev); 597 struct raid_softc *rs; 598 const struct bdevsw *bdev; 599 struct disklabel *lp; 600 RF_Raid_t *raidPtr; 601 daddr_t offset; 602 int part, c, sparecol, j, scol, dumpto; 603 int error = 0; 604 605 if (unit >= numraid) 606 return (ENXIO); 607 608 rs = &raid_softc[unit]; 609 raidPtr = raidPtrs[unit]; 610 611 if ((rs->sc_flags & RAIDF_INITED) == 0) 612 return ENXIO; 613 614 /* we only support dumping to RAID 1 sets */ 615 if (raidPtr->Layout.numDataCol != 1 || 616 raidPtr->Layout.numParityCol != 1) 617 return EINVAL; 618 619 620 if ((error = raidlock(rs)) != 0) 621 return error; 622 623 if (size % DEV_BSIZE != 0) { 624 error = EINVAL; 625 goto out; 626 } 627 628 if (blkno + size / DEV_BSIZE > rs->sc_size) { 629 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > " 630 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno, 631 size / DEV_BSIZE, rs->sc_size); 632 error = EINVAL; 633 goto out; 634 } 635 636 part = DISKPART(dev); 637 lp = rs->sc_dkdev.dk_label; 638 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS; 639 640 /* figure out what device is alive.. */ 641 642 /* 643 Look for a component to dump to. The preference for the 644 component to dump to is as follows: 645 1) the master 646 2) a used_spare of the master 647 3) the slave 648 4) a used_spare of the slave 649 */ 650 651 dumpto = -1; 652 for (c = 0; c < raidPtr->numCol; c++) { 653 if (raidPtr->Disks[c].status == rf_ds_optimal) { 654 /* this might be the one */ 655 dumpto = c; 656 break; 657 } 658 } 659 660 /* 661 At this point we have possibly selected a live master or a 662 live slave. We now check to see if there is a spared 663 master (or a spared slave), if we didn't find a live master 664 or a live slave. 665 */ 666 667 for (c = 0; c < raidPtr->numSpare; c++) { 668 sparecol = raidPtr->numCol + c; 669 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 670 /* How about this one? */ 671 scol = -1; 672 for(j=0;j<raidPtr->numCol;j++) { 673 if (raidPtr->Disks[j].spareCol == sparecol) { 674 scol = j; 675 break; 676 } 677 } 678 if (scol == 0) { 679 /* 680 We must have found a spared master! 681 We'll take that over anything else 682 found so far. (We couldn't have 683 found a real master before, since 684 this is a used spare, and it's 685 saying that it's replacing the 686 master.) On reboot (with 687 autoconfiguration turned on) 688 sparecol will become the 1st 689 component (component0) of this set. 690 */ 691 dumpto = sparecol; 692 break; 693 } else if (scol != -1) { 694 /* 695 Must be a spared slave. We'll dump 696 to that if we havn't found anything 697 else so far. 698 */ 699 if (dumpto == -1) 700 dumpto = sparecol; 701 } 702 } 703 } 704 705 if (dumpto == -1) { 706 /* we couldn't find any live components to dump to!?!? 707 */ 708 error = EINVAL; 709 goto out; 710 } 711 712 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 713 714 /* 715 Note that blkno is relative to this particular partition. 716 By adding the offset of this partition in the RAID 717 set, and also adding RF_PROTECTED_SECTORS, we get a 718 value that is relative to the partition used for the 719 underlying component. 720 */ 721 722 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 723 blkno + offset, va, size); 724 725 out: 726 raidunlock(rs); 727 728 return error; 729 } 730 /* ARGSUSED */ 731 int 732 raidopen(dev_t dev, int flags, int fmt, 733 struct lwp *l) 734 { 735 int unit = raidunit(dev); 736 struct raid_softc *rs; 737 struct disklabel *lp; 738 int part, pmask; 739 int error = 0; 740 741 if (unit >= numraid) 742 return (ENXIO); 743 rs = &raid_softc[unit]; 744 745 if ((error = raidlock(rs)) != 0) 746 return (error); 747 748 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) { 749 error = EBUSY; 750 goto bad; 751 } 752 753 lp = rs->sc_dkdev.dk_label; 754 755 part = DISKPART(dev); 756 757 /* 758 * If there are wedges, and this is not RAW_PART, then we 759 * need to fail. 760 */ 761 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 762 error = EBUSY; 763 goto bad; 764 } 765 pmask = (1 << part); 766 767 if ((rs->sc_flags & RAIDF_INITED) && 768 (rs->sc_dkdev.dk_openmask == 0)) 769 raidgetdisklabel(dev); 770 771 /* make sure that this partition exists */ 772 773 if (part != RAW_PART) { 774 if (((rs->sc_flags & RAIDF_INITED) == 0) || 775 ((part >= lp->d_npartitions) || 776 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 777 error = ENXIO; 778 goto bad; 779 } 780 } 781 /* Prevent this unit from being unconfigured while open. */ 782 switch (fmt) { 783 case S_IFCHR: 784 rs->sc_dkdev.dk_copenmask |= pmask; 785 break; 786 787 case S_IFBLK: 788 rs->sc_dkdev.dk_bopenmask |= pmask; 789 break; 790 } 791 792 if ((rs->sc_dkdev.dk_openmask == 0) && 793 ((rs->sc_flags & RAIDF_INITED) != 0)) { 794 /* First one... mark things as dirty... Note that we *MUST* 795 have done a configure before this. I DO NOT WANT TO BE 796 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 797 THAT THEY BELONG TOGETHER!!!!! */ 798 /* XXX should check to see if we're only open for reading 799 here... If so, we needn't do this, but then need some 800 other way of keeping track of what's happened.. */ 801 802 rf_markalldirty(raidPtrs[unit]); 803 } 804 805 806 rs->sc_dkdev.dk_openmask = 807 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 808 809 bad: 810 raidunlock(rs); 811 812 return (error); 813 814 815 } 816 /* ARGSUSED */ 817 int 818 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 819 { 820 int unit = raidunit(dev); 821 struct raid_softc *rs; 822 int error = 0; 823 int part; 824 825 if (unit >= numraid) 826 return (ENXIO); 827 rs = &raid_softc[unit]; 828 829 if ((error = raidlock(rs)) != 0) 830 return (error); 831 832 part = DISKPART(dev); 833 834 /* ...that much closer to allowing unconfiguration... */ 835 switch (fmt) { 836 case S_IFCHR: 837 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 838 break; 839 840 case S_IFBLK: 841 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 842 break; 843 } 844 rs->sc_dkdev.dk_openmask = 845 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 846 847 if ((rs->sc_dkdev.dk_openmask == 0) && 848 ((rs->sc_flags & RAIDF_INITED) != 0)) { 849 /* Last one... device is not unconfigured yet. 850 Device shutdown has taken care of setting the 851 clean bits if RAIDF_INITED is not set 852 mark things as clean... */ 853 854 rf_update_component_labels(raidPtrs[unit], 855 RF_FINAL_COMPONENT_UPDATE); 856 857 /* If the kernel is shutting down, it will detach 858 * this RAID set soon enough. 859 */ 860 } 861 862 raidunlock(rs); 863 return (0); 864 865 } 866 867 void 868 raidstrategy(struct buf *bp) 869 { 870 int s; 871 872 unsigned int raidID = raidunit(bp->b_dev); 873 RF_Raid_t *raidPtr; 874 struct raid_softc *rs = &raid_softc[raidID]; 875 int wlabel; 876 877 if ((rs->sc_flags & RAIDF_INITED) ==0) { 878 bp->b_error = ENXIO; 879 goto done; 880 } 881 if (raidID >= numraid || !raidPtrs[raidID]) { 882 bp->b_error = ENODEV; 883 goto done; 884 } 885 raidPtr = raidPtrs[raidID]; 886 if (!raidPtr->valid) { 887 bp->b_error = ENODEV; 888 goto done; 889 } 890 if (bp->b_bcount == 0) { 891 db1_printf(("b_bcount is zero..\n")); 892 goto done; 893 } 894 895 /* 896 * Do bounds checking and adjust transfer. If there's an 897 * error, the bounds check will flag that for us. 898 */ 899 900 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 901 if (DISKPART(bp->b_dev) == RAW_PART) { 902 uint64_t size; /* device size in DEV_BSIZE unit */ 903 904 if (raidPtr->logBytesPerSector > DEV_BSHIFT) { 905 size = raidPtr->totalSectors << 906 (raidPtr->logBytesPerSector - DEV_BSHIFT); 907 } else { 908 size = raidPtr->totalSectors >> 909 (DEV_BSHIFT - raidPtr->logBytesPerSector); 910 } 911 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) { 912 goto done; 913 } 914 } else { 915 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 916 db1_printf(("Bounds check failed!!:%d %d\n", 917 (int) bp->b_blkno, (int) wlabel)); 918 goto done; 919 } 920 } 921 s = splbio(); 922 923 bp->b_resid = 0; 924 925 /* stuff it onto our queue */ 926 bufq_put(rs->buf_queue, bp); 927 928 /* scheduled the IO to happen at the next convenient time */ 929 wakeup(&(raidPtrs[raidID]->iodone)); 930 931 splx(s); 932 return; 933 934 done: 935 bp->b_resid = bp->b_bcount; 936 biodone(bp); 937 } 938 /* ARGSUSED */ 939 int 940 raidread(dev_t dev, struct uio *uio, int flags) 941 { 942 int unit = raidunit(dev); 943 struct raid_softc *rs; 944 945 if (unit >= numraid) 946 return (ENXIO); 947 rs = &raid_softc[unit]; 948 949 if ((rs->sc_flags & RAIDF_INITED) == 0) 950 return (ENXIO); 951 952 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 953 954 } 955 /* ARGSUSED */ 956 int 957 raidwrite(dev_t dev, struct uio *uio, int flags) 958 { 959 int unit = raidunit(dev); 960 struct raid_softc *rs; 961 962 if (unit >= numraid) 963 return (ENXIO); 964 rs = &raid_softc[unit]; 965 966 if ((rs->sc_flags & RAIDF_INITED) == 0) 967 return (ENXIO); 968 969 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 970 971 } 972 973 static int 974 raid_detach_unlocked(struct raid_softc *rs) 975 { 976 int error; 977 RF_Raid_t *raidPtr; 978 979 raidPtr = raidPtrs[device_unit(rs->sc_dev)]; 980 981 /* 982 * If somebody has a partition mounted, we shouldn't 983 * shutdown. 984 */ 985 if (rs->sc_dkdev.dk_openmask != 0) 986 return EBUSY; 987 988 if ((rs->sc_flags & RAIDF_INITED) == 0) 989 ; /* not initialized: nothing to do */ 990 else if ((error = rf_Shutdown(raidPtr)) != 0) 991 return error; 992 else 993 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN); 994 995 /* Detach the disk. */ 996 disk_detach(&rs->sc_dkdev); 997 disk_destroy(&rs->sc_dkdev); 998 999 return 0; 1000 } 1001 1002 int 1003 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1004 { 1005 int unit = raidunit(dev); 1006 int error = 0; 1007 int part, pmask; 1008 cfdata_t cf; 1009 struct raid_softc *rs; 1010 RF_Config_t *k_cfg, *u_cfg; 1011 RF_Raid_t *raidPtr; 1012 RF_RaidDisk_t *diskPtr; 1013 RF_AccTotals_t *totals; 1014 RF_DeviceConfig_t *d_cfg, **ucfgp; 1015 u_char *specific_buf; 1016 int retcode = 0; 1017 int column; 1018 /* int raidid; */ 1019 struct rf_recon_req *rrcopy, *rr; 1020 RF_ComponentLabel_t *clabel; 1021 RF_ComponentLabel_t *ci_label; 1022 RF_ComponentLabel_t **clabel_ptr; 1023 RF_SingleComponent_t *sparePtr,*componentPtr; 1024 RF_SingleComponent_t component; 1025 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 1026 int i, j, d; 1027 #ifdef __HAVE_OLD_DISKLABEL 1028 struct disklabel newlabel; 1029 #endif 1030 struct dkwedge_info *dkw; 1031 1032 if (unit >= numraid) 1033 return (ENXIO); 1034 rs = &raid_softc[unit]; 1035 raidPtr = raidPtrs[unit]; 1036 1037 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev, 1038 (int) DISKPART(dev), (int) unit, cmd)); 1039 1040 /* Must be open for writes for these commands... */ 1041 switch (cmd) { 1042 #ifdef DIOCGSECTORSIZE 1043 case DIOCGSECTORSIZE: 1044 *(u_int *)data = raidPtr->bytesPerSector; 1045 return 0; 1046 case DIOCGMEDIASIZE: 1047 *(off_t *)data = 1048 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector; 1049 return 0; 1050 #endif 1051 case DIOCSDINFO: 1052 case DIOCWDINFO: 1053 #ifdef __HAVE_OLD_DISKLABEL 1054 case ODIOCWDINFO: 1055 case ODIOCSDINFO: 1056 #endif 1057 case DIOCWLABEL: 1058 case DIOCAWEDGE: 1059 case DIOCDWEDGE: 1060 if ((flag & FWRITE) == 0) 1061 return (EBADF); 1062 } 1063 1064 /* Must be initialized for these... */ 1065 switch (cmd) { 1066 case DIOCGDINFO: 1067 case DIOCSDINFO: 1068 case DIOCWDINFO: 1069 #ifdef __HAVE_OLD_DISKLABEL 1070 case ODIOCGDINFO: 1071 case ODIOCWDINFO: 1072 case ODIOCSDINFO: 1073 case ODIOCGDEFLABEL: 1074 #endif 1075 case DIOCGPART: 1076 case DIOCWLABEL: 1077 case DIOCGDEFLABEL: 1078 case DIOCAWEDGE: 1079 case DIOCDWEDGE: 1080 case DIOCLWEDGES: 1081 case DIOCCACHESYNC: 1082 case RAIDFRAME_SHUTDOWN: 1083 case RAIDFRAME_REWRITEPARITY: 1084 case RAIDFRAME_GET_INFO: 1085 case RAIDFRAME_RESET_ACCTOTALS: 1086 case RAIDFRAME_GET_ACCTOTALS: 1087 case RAIDFRAME_KEEP_ACCTOTALS: 1088 case RAIDFRAME_GET_SIZE: 1089 case RAIDFRAME_FAIL_DISK: 1090 case RAIDFRAME_COPYBACK: 1091 case RAIDFRAME_CHECK_RECON_STATUS: 1092 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1093 case RAIDFRAME_GET_COMPONENT_LABEL: 1094 case RAIDFRAME_SET_COMPONENT_LABEL: 1095 case RAIDFRAME_ADD_HOT_SPARE: 1096 case RAIDFRAME_REMOVE_HOT_SPARE: 1097 case RAIDFRAME_INIT_LABELS: 1098 case RAIDFRAME_REBUILD_IN_PLACE: 1099 case RAIDFRAME_CHECK_PARITY: 1100 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1101 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1102 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1103 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1104 case RAIDFRAME_SET_AUTOCONFIG: 1105 case RAIDFRAME_SET_ROOT: 1106 case RAIDFRAME_DELETE_COMPONENT: 1107 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1108 case RAIDFRAME_PARITYMAP_STATUS: 1109 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1110 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1111 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1112 if ((rs->sc_flags & RAIDF_INITED) == 0) 1113 return (ENXIO); 1114 } 1115 1116 switch (cmd) { 1117 #ifdef COMPAT_50 1118 case RAIDFRAME_GET_INFO50: 1119 return rf_get_info50(raidPtr, data); 1120 1121 case RAIDFRAME_CONFIGURE50: 1122 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0) 1123 return retcode; 1124 goto config; 1125 #endif 1126 /* configure the system */ 1127 case RAIDFRAME_CONFIGURE: 1128 1129 if (raidPtr->valid) { 1130 /* There is a valid RAID set running on this unit! */ 1131 printf("raid%d: Device already configured!\n",unit); 1132 return(EINVAL); 1133 } 1134 1135 /* copy-in the configuration information */ 1136 /* data points to a pointer to the configuration structure */ 1137 1138 u_cfg = *((RF_Config_t **) data); 1139 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 1140 if (k_cfg == NULL) { 1141 return (ENOMEM); 1142 } 1143 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 1144 if (retcode) { 1145 RF_Free(k_cfg, sizeof(RF_Config_t)); 1146 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 1147 retcode)); 1148 return (retcode); 1149 } 1150 goto config; 1151 config: 1152 /* allocate a buffer for the layout-specific data, and copy it 1153 * in */ 1154 if (k_cfg->layoutSpecificSize) { 1155 if (k_cfg->layoutSpecificSize > 10000) { 1156 /* sanity check */ 1157 RF_Free(k_cfg, sizeof(RF_Config_t)); 1158 return (EINVAL); 1159 } 1160 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 1161 (u_char *)); 1162 if (specific_buf == NULL) { 1163 RF_Free(k_cfg, sizeof(RF_Config_t)); 1164 return (ENOMEM); 1165 } 1166 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1167 k_cfg->layoutSpecificSize); 1168 if (retcode) { 1169 RF_Free(k_cfg, sizeof(RF_Config_t)); 1170 RF_Free(specific_buf, 1171 k_cfg->layoutSpecificSize); 1172 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 1173 retcode)); 1174 return (retcode); 1175 } 1176 } else 1177 specific_buf = NULL; 1178 k_cfg->layoutSpecific = specific_buf; 1179 1180 /* should do some kind of sanity check on the configuration. 1181 * Store the sum of all the bytes in the last byte? */ 1182 1183 /* configure the system */ 1184 1185 /* 1186 * Clear the entire RAID descriptor, just to make sure 1187 * there is no stale data left in the case of a 1188 * reconfiguration 1189 */ 1190 memset(raidPtr, 0, sizeof(*raidPtr)); 1191 raidPtr->raidid = unit; 1192 1193 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1194 1195 if (retcode == 0) { 1196 1197 /* allow this many simultaneous IO's to 1198 this RAID device */ 1199 raidPtr->openings = RAIDOUTSTANDING; 1200 1201 raidinit(raidPtr); 1202 rf_markalldirty(raidPtr); 1203 } 1204 /* free the buffers. No return code here. */ 1205 if (k_cfg->layoutSpecificSize) { 1206 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1207 } 1208 RF_Free(k_cfg, sizeof(RF_Config_t)); 1209 1210 return (retcode); 1211 1212 /* shutdown the system */ 1213 case RAIDFRAME_SHUTDOWN: 1214 1215 part = DISKPART(dev); 1216 pmask = (1 << part); 1217 1218 if ((error = raidlock(rs)) != 0) 1219 return (error); 1220 1221 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 1222 ((rs->sc_dkdev.dk_bopenmask & pmask) && 1223 (rs->sc_dkdev.dk_copenmask & pmask))) 1224 retcode = EBUSY; 1225 else { 1226 rs->sc_flags |= RAIDF_SHUTDOWN; 1227 rs->sc_dkdev.dk_copenmask &= ~pmask; 1228 rs->sc_dkdev.dk_bopenmask &= ~pmask; 1229 rs->sc_dkdev.dk_openmask &= ~pmask; 1230 retcode = 0; 1231 } 1232 1233 raidunlock(rs); 1234 1235 if (retcode != 0) 1236 return retcode; 1237 1238 /* free the pseudo device attach bits */ 1239 1240 cf = device_cfdata(rs->sc_dev); 1241 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0) 1242 free(cf, M_RAIDFRAME); 1243 1244 return (retcode); 1245 case RAIDFRAME_GET_COMPONENT_LABEL: 1246 clabel_ptr = (RF_ComponentLabel_t **) data; 1247 /* need to read the component label for the disk indicated 1248 by row,column in clabel */ 1249 1250 /* 1251 * Perhaps there should be an option to skip the in-core 1252 * copy and hit the disk, as with disklabel(8). 1253 */ 1254 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *)); 1255 1256 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel)); 1257 1258 if (retcode) { 1259 RF_Free(clabel, sizeof(*clabel)); 1260 return retcode; 1261 } 1262 1263 clabel->row = 0; /* Don't allow looking at anything else.*/ 1264 1265 column = clabel->column; 1266 1267 if ((column < 0) || (column >= raidPtr->numCol + 1268 raidPtr->numSpare)) { 1269 RF_Free(clabel, sizeof(*clabel)); 1270 return EINVAL; 1271 } 1272 1273 RF_Free(clabel, sizeof(*clabel)); 1274 1275 clabel = raidget_component_label(raidPtr, column); 1276 1277 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr)); 1278 1279 #if 0 1280 case RAIDFRAME_SET_COMPONENT_LABEL: 1281 clabel = (RF_ComponentLabel_t *) data; 1282 1283 /* XXX check the label for valid stuff... */ 1284 /* Note that some things *should not* get modified -- 1285 the user should be re-initing the labels instead of 1286 trying to patch things. 1287 */ 1288 1289 raidid = raidPtr->raidid; 1290 #ifdef DEBUG 1291 printf("raid%d: Got component label:\n", raidid); 1292 printf("raid%d: Version: %d\n", raidid, clabel->version); 1293 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1294 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1295 printf("raid%d: Column: %d\n", raidid, clabel->column); 1296 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1297 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1298 printf("raid%d: Status: %d\n", raidid, clabel->status); 1299 #endif 1300 clabel->row = 0; 1301 column = clabel->column; 1302 1303 if ((column < 0) || (column >= raidPtr->numCol)) { 1304 return(EINVAL); 1305 } 1306 1307 /* XXX this isn't allowed to do anything for now :-) */ 1308 1309 /* XXX and before it is, we need to fill in the rest 1310 of the fields!?!?!?! */ 1311 memcpy(raidget_component_label(raidPtr, column), 1312 clabel, sizeof(*clabel)); 1313 raidflush_component_label(raidPtr, column); 1314 return (0); 1315 #endif 1316 1317 case RAIDFRAME_INIT_LABELS: 1318 clabel = (RF_ComponentLabel_t *) data; 1319 /* 1320 we only want the serial number from 1321 the above. We get all the rest of the information 1322 from the config that was used to create this RAID 1323 set. 1324 */ 1325 1326 raidPtr->serial_number = clabel->serial_number; 1327 1328 for(column=0;column<raidPtr->numCol;column++) { 1329 diskPtr = &raidPtr->Disks[column]; 1330 if (!RF_DEAD_DISK(diskPtr->status)) { 1331 ci_label = raidget_component_label(raidPtr, 1332 column); 1333 /* Zeroing this is important. */ 1334 memset(ci_label, 0, sizeof(*ci_label)); 1335 raid_init_component_label(raidPtr, ci_label); 1336 ci_label->serial_number = 1337 raidPtr->serial_number; 1338 ci_label->row = 0; /* we dont' pretend to support more */ 1339 ci_label->partitionSize = 1340 diskPtr->partitionSize; 1341 ci_label->column = column; 1342 raidflush_component_label(raidPtr, column); 1343 } 1344 /* XXXjld what about the spares? */ 1345 } 1346 1347 return (retcode); 1348 case RAIDFRAME_SET_AUTOCONFIG: 1349 d = rf_set_autoconfig(raidPtr, *(int *) data); 1350 printf("raid%d: New autoconfig value is: %d\n", 1351 raidPtr->raidid, d); 1352 *(int *) data = d; 1353 return (retcode); 1354 1355 case RAIDFRAME_SET_ROOT: 1356 d = rf_set_rootpartition(raidPtr, *(int *) data); 1357 printf("raid%d: New rootpartition value is: %d\n", 1358 raidPtr->raidid, d); 1359 *(int *) data = d; 1360 return (retcode); 1361 1362 /* initialize all parity */ 1363 case RAIDFRAME_REWRITEPARITY: 1364 1365 if (raidPtr->Layout.map->faultsTolerated == 0) { 1366 /* Parity for RAID 0 is trivially correct */ 1367 raidPtr->parity_good = RF_RAID_CLEAN; 1368 return(0); 1369 } 1370 1371 if (raidPtr->parity_rewrite_in_progress == 1) { 1372 /* Re-write is already in progress! */ 1373 return(EINVAL); 1374 } 1375 1376 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1377 rf_RewriteParityThread, 1378 raidPtr,"raid_parity"); 1379 return (retcode); 1380 1381 1382 case RAIDFRAME_ADD_HOT_SPARE: 1383 sparePtr = (RF_SingleComponent_t *) data; 1384 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t)); 1385 retcode = rf_add_hot_spare(raidPtr, &component); 1386 return(retcode); 1387 1388 case RAIDFRAME_REMOVE_HOT_SPARE: 1389 return(retcode); 1390 1391 case RAIDFRAME_DELETE_COMPONENT: 1392 componentPtr = (RF_SingleComponent_t *)data; 1393 memcpy( &component, componentPtr, 1394 sizeof(RF_SingleComponent_t)); 1395 retcode = rf_delete_component(raidPtr, &component); 1396 return(retcode); 1397 1398 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1399 componentPtr = (RF_SingleComponent_t *)data; 1400 memcpy( &component, componentPtr, 1401 sizeof(RF_SingleComponent_t)); 1402 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1403 return(retcode); 1404 1405 case RAIDFRAME_REBUILD_IN_PLACE: 1406 1407 if (raidPtr->Layout.map->faultsTolerated == 0) { 1408 /* Can't do this on a RAID 0!! */ 1409 return(EINVAL); 1410 } 1411 1412 if (raidPtr->recon_in_progress == 1) { 1413 /* a reconstruct is already in progress! */ 1414 return(EINVAL); 1415 } 1416 1417 componentPtr = (RF_SingleComponent_t *) data; 1418 memcpy( &component, componentPtr, 1419 sizeof(RF_SingleComponent_t)); 1420 component.row = 0; /* we don't support any more */ 1421 column = component.column; 1422 1423 if ((column < 0) || (column >= raidPtr->numCol)) { 1424 return(EINVAL); 1425 } 1426 1427 RF_LOCK_MUTEX(raidPtr->mutex); 1428 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1429 (raidPtr->numFailures > 0)) { 1430 /* XXX 0 above shouldn't be constant!!! */ 1431 /* some component other than this has failed. 1432 Let's not make things worse than they already 1433 are... */ 1434 printf("raid%d: Unable to reconstruct to disk at:\n", 1435 raidPtr->raidid); 1436 printf("raid%d: Col: %d Too many failures.\n", 1437 raidPtr->raidid, column); 1438 RF_UNLOCK_MUTEX(raidPtr->mutex); 1439 return (EINVAL); 1440 } 1441 if (raidPtr->Disks[column].status == 1442 rf_ds_reconstructing) { 1443 printf("raid%d: Unable to reconstruct to disk at:\n", 1444 raidPtr->raidid); 1445 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column); 1446 1447 RF_UNLOCK_MUTEX(raidPtr->mutex); 1448 return (EINVAL); 1449 } 1450 if (raidPtr->Disks[column].status == rf_ds_spared) { 1451 RF_UNLOCK_MUTEX(raidPtr->mutex); 1452 return (EINVAL); 1453 } 1454 RF_UNLOCK_MUTEX(raidPtr->mutex); 1455 1456 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1457 if (rrcopy == NULL) 1458 return(ENOMEM); 1459 1460 rrcopy->raidPtr = (void *) raidPtr; 1461 rrcopy->col = column; 1462 1463 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1464 rf_ReconstructInPlaceThread, 1465 rrcopy,"raid_reconip"); 1466 return(retcode); 1467 1468 case RAIDFRAME_GET_INFO: 1469 if (!raidPtr->valid) 1470 return (ENODEV); 1471 ucfgp = (RF_DeviceConfig_t **) data; 1472 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1473 (RF_DeviceConfig_t *)); 1474 if (d_cfg == NULL) 1475 return (ENOMEM); 1476 d_cfg->rows = 1; /* there is only 1 row now */ 1477 d_cfg->cols = raidPtr->numCol; 1478 d_cfg->ndevs = raidPtr->numCol; 1479 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1480 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1481 return (ENOMEM); 1482 } 1483 d_cfg->nspares = raidPtr->numSpare; 1484 if (d_cfg->nspares >= RF_MAX_DISKS) { 1485 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1486 return (ENOMEM); 1487 } 1488 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1489 d = 0; 1490 for (j = 0; j < d_cfg->cols; j++) { 1491 d_cfg->devs[d] = raidPtr->Disks[j]; 1492 d++; 1493 } 1494 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1495 d_cfg->spares[i] = raidPtr->Disks[j]; 1496 } 1497 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1498 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1499 1500 return (retcode); 1501 1502 case RAIDFRAME_CHECK_PARITY: 1503 *(int *) data = raidPtr->parity_good; 1504 return (0); 1505 1506 case RAIDFRAME_PARITYMAP_STATUS: 1507 if (rf_paritymap_ineligible(raidPtr)) 1508 return EINVAL; 1509 rf_paritymap_status(raidPtr->parity_map, 1510 (struct rf_pmstat *)data); 1511 return 0; 1512 1513 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1514 if (rf_paritymap_ineligible(raidPtr)) 1515 return EINVAL; 1516 if (raidPtr->parity_map == NULL) 1517 return ENOENT; /* ??? */ 1518 if (0 != rf_paritymap_set_params(raidPtr->parity_map, 1519 (struct rf_pmparams *)data, 1)) 1520 return EINVAL; 1521 return 0; 1522 1523 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1524 if (rf_paritymap_ineligible(raidPtr)) 1525 return EINVAL; 1526 *(int *) data = rf_paritymap_get_disable(raidPtr); 1527 return 0; 1528 1529 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1530 if (rf_paritymap_ineligible(raidPtr)) 1531 return EINVAL; 1532 rf_paritymap_set_disable(raidPtr, *(int *)data); 1533 /* XXX should errors be passed up? */ 1534 return 0; 1535 1536 case RAIDFRAME_RESET_ACCTOTALS: 1537 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1538 return (0); 1539 1540 case RAIDFRAME_GET_ACCTOTALS: 1541 totals = (RF_AccTotals_t *) data; 1542 *totals = raidPtr->acc_totals; 1543 return (0); 1544 1545 case RAIDFRAME_KEEP_ACCTOTALS: 1546 raidPtr->keep_acc_totals = *(int *)data; 1547 return (0); 1548 1549 case RAIDFRAME_GET_SIZE: 1550 *(int *) data = raidPtr->totalSectors; 1551 return (0); 1552 1553 /* fail a disk & optionally start reconstruction */ 1554 case RAIDFRAME_FAIL_DISK: 1555 1556 if (raidPtr->Layout.map->faultsTolerated == 0) { 1557 /* Can't do this on a RAID 0!! */ 1558 return(EINVAL); 1559 } 1560 1561 rr = (struct rf_recon_req *) data; 1562 rr->row = 0; 1563 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1564 return (EINVAL); 1565 1566 1567 RF_LOCK_MUTEX(raidPtr->mutex); 1568 if (raidPtr->status == rf_rs_reconstructing) { 1569 /* you can't fail a disk while we're reconstructing! */ 1570 /* XXX wrong for RAID6 */ 1571 RF_UNLOCK_MUTEX(raidPtr->mutex); 1572 return (EINVAL); 1573 } 1574 if ((raidPtr->Disks[rr->col].status == 1575 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1576 /* some other component has failed. Let's not make 1577 things worse. XXX wrong for RAID6 */ 1578 RF_UNLOCK_MUTEX(raidPtr->mutex); 1579 return (EINVAL); 1580 } 1581 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1582 /* Can't fail a spared disk! */ 1583 RF_UNLOCK_MUTEX(raidPtr->mutex); 1584 return (EINVAL); 1585 } 1586 RF_UNLOCK_MUTEX(raidPtr->mutex); 1587 1588 /* make a copy of the recon request so that we don't rely on 1589 * the user's buffer */ 1590 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1591 if (rrcopy == NULL) 1592 return(ENOMEM); 1593 memcpy(rrcopy, rr, sizeof(*rr)); 1594 rrcopy->raidPtr = (void *) raidPtr; 1595 1596 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1597 rf_ReconThread, 1598 rrcopy,"raid_recon"); 1599 return (0); 1600 1601 /* invoke a copyback operation after recon on whatever disk 1602 * needs it, if any */ 1603 case RAIDFRAME_COPYBACK: 1604 1605 if (raidPtr->Layout.map->faultsTolerated == 0) { 1606 /* This makes no sense on a RAID 0!! */ 1607 return(EINVAL); 1608 } 1609 1610 if (raidPtr->copyback_in_progress == 1) { 1611 /* Copyback is already in progress! */ 1612 return(EINVAL); 1613 } 1614 1615 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1616 rf_CopybackThread, 1617 raidPtr,"raid_copyback"); 1618 return (retcode); 1619 1620 /* return the percentage completion of reconstruction */ 1621 case RAIDFRAME_CHECK_RECON_STATUS: 1622 if (raidPtr->Layout.map->faultsTolerated == 0) { 1623 /* This makes no sense on a RAID 0, so tell the 1624 user it's done. */ 1625 *(int *) data = 100; 1626 return(0); 1627 } 1628 if (raidPtr->status != rf_rs_reconstructing) 1629 *(int *) data = 100; 1630 else { 1631 if (raidPtr->reconControl->numRUsTotal > 0) { 1632 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1633 } else { 1634 *(int *) data = 0; 1635 } 1636 } 1637 return (0); 1638 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1639 progressInfoPtr = (RF_ProgressInfo_t **) data; 1640 if (raidPtr->status != rf_rs_reconstructing) { 1641 progressInfo.remaining = 0; 1642 progressInfo.completed = 100; 1643 progressInfo.total = 100; 1644 } else { 1645 progressInfo.total = 1646 raidPtr->reconControl->numRUsTotal; 1647 progressInfo.completed = 1648 raidPtr->reconControl->numRUsComplete; 1649 progressInfo.remaining = progressInfo.total - 1650 progressInfo.completed; 1651 } 1652 retcode = copyout(&progressInfo, *progressInfoPtr, 1653 sizeof(RF_ProgressInfo_t)); 1654 return (retcode); 1655 1656 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1657 if (raidPtr->Layout.map->faultsTolerated == 0) { 1658 /* This makes no sense on a RAID 0, so tell the 1659 user it's done. */ 1660 *(int *) data = 100; 1661 return(0); 1662 } 1663 if (raidPtr->parity_rewrite_in_progress == 1) { 1664 *(int *) data = 100 * 1665 raidPtr->parity_rewrite_stripes_done / 1666 raidPtr->Layout.numStripe; 1667 } else { 1668 *(int *) data = 100; 1669 } 1670 return (0); 1671 1672 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1673 progressInfoPtr = (RF_ProgressInfo_t **) data; 1674 if (raidPtr->parity_rewrite_in_progress == 1) { 1675 progressInfo.total = raidPtr->Layout.numStripe; 1676 progressInfo.completed = 1677 raidPtr->parity_rewrite_stripes_done; 1678 progressInfo.remaining = progressInfo.total - 1679 progressInfo.completed; 1680 } else { 1681 progressInfo.remaining = 0; 1682 progressInfo.completed = 100; 1683 progressInfo.total = 100; 1684 } 1685 retcode = copyout(&progressInfo, *progressInfoPtr, 1686 sizeof(RF_ProgressInfo_t)); 1687 return (retcode); 1688 1689 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1690 if (raidPtr->Layout.map->faultsTolerated == 0) { 1691 /* This makes no sense on a RAID 0 */ 1692 *(int *) data = 100; 1693 return(0); 1694 } 1695 if (raidPtr->copyback_in_progress == 1) { 1696 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1697 raidPtr->Layout.numStripe; 1698 } else { 1699 *(int *) data = 100; 1700 } 1701 return (0); 1702 1703 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1704 progressInfoPtr = (RF_ProgressInfo_t **) data; 1705 if (raidPtr->copyback_in_progress == 1) { 1706 progressInfo.total = raidPtr->Layout.numStripe; 1707 progressInfo.completed = 1708 raidPtr->copyback_stripes_done; 1709 progressInfo.remaining = progressInfo.total - 1710 progressInfo.completed; 1711 } else { 1712 progressInfo.remaining = 0; 1713 progressInfo.completed = 100; 1714 progressInfo.total = 100; 1715 } 1716 retcode = copyout(&progressInfo, *progressInfoPtr, 1717 sizeof(RF_ProgressInfo_t)); 1718 return (retcode); 1719 1720 /* the sparetable daemon calls this to wait for the kernel to 1721 * need a spare table. this ioctl does not return until a 1722 * spare table is needed. XXX -- calling mpsleep here in the 1723 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1724 * -- I should either compute the spare table in the kernel, 1725 * or have a different -- XXX XXX -- interface (a different 1726 * character device) for delivering the table -- XXX */ 1727 #if 0 1728 case RAIDFRAME_SPARET_WAIT: 1729 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1730 while (!rf_sparet_wait_queue) 1731 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); 1732 waitreq = rf_sparet_wait_queue; 1733 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1734 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1735 1736 /* structure assignment */ 1737 *((RF_SparetWait_t *) data) = *waitreq; 1738 1739 RF_Free(waitreq, sizeof(*waitreq)); 1740 return (0); 1741 1742 /* wakes up a process waiting on SPARET_WAIT and puts an error 1743 * code in it that will cause the dameon to exit */ 1744 case RAIDFRAME_ABORT_SPARET_WAIT: 1745 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1746 waitreq->fcol = -1; 1747 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1748 waitreq->next = rf_sparet_wait_queue; 1749 rf_sparet_wait_queue = waitreq; 1750 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1751 wakeup(&rf_sparet_wait_queue); 1752 return (0); 1753 1754 /* used by the spare table daemon to deliver a spare table 1755 * into the kernel */ 1756 case RAIDFRAME_SEND_SPARET: 1757 1758 /* install the spare table */ 1759 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1760 1761 /* respond to the requestor. the return status of the spare 1762 * table installation is passed in the "fcol" field */ 1763 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1764 waitreq->fcol = retcode; 1765 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1766 waitreq->next = rf_sparet_resp_queue; 1767 rf_sparet_resp_queue = waitreq; 1768 wakeup(&rf_sparet_resp_queue); 1769 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1770 1771 return (retcode); 1772 #endif 1773 1774 default: 1775 break; /* fall through to the os-specific code below */ 1776 1777 } 1778 1779 if (!raidPtr->valid) 1780 return (EINVAL); 1781 1782 /* 1783 * Add support for "regular" device ioctls here. 1784 */ 1785 1786 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l); 1787 if (error != EPASSTHROUGH) 1788 return (error); 1789 1790 switch (cmd) { 1791 case DIOCGDINFO: 1792 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1793 break; 1794 #ifdef __HAVE_OLD_DISKLABEL 1795 case ODIOCGDINFO: 1796 newlabel = *(rs->sc_dkdev.dk_label); 1797 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1798 return ENOTTY; 1799 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1800 break; 1801 #endif 1802 1803 case DIOCGPART: 1804 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1805 ((struct partinfo *) data)->part = 1806 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1807 break; 1808 1809 case DIOCWDINFO: 1810 case DIOCSDINFO: 1811 #ifdef __HAVE_OLD_DISKLABEL 1812 case ODIOCWDINFO: 1813 case ODIOCSDINFO: 1814 #endif 1815 { 1816 struct disklabel *lp; 1817 #ifdef __HAVE_OLD_DISKLABEL 1818 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1819 memset(&newlabel, 0, sizeof newlabel); 1820 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1821 lp = &newlabel; 1822 } else 1823 #endif 1824 lp = (struct disklabel *)data; 1825 1826 if ((error = raidlock(rs)) != 0) 1827 return (error); 1828 1829 rs->sc_flags |= RAIDF_LABELLING; 1830 1831 error = setdisklabel(rs->sc_dkdev.dk_label, 1832 lp, 0, rs->sc_dkdev.dk_cpulabel); 1833 if (error == 0) { 1834 if (cmd == DIOCWDINFO 1835 #ifdef __HAVE_OLD_DISKLABEL 1836 || cmd == ODIOCWDINFO 1837 #endif 1838 ) 1839 error = writedisklabel(RAIDLABELDEV(dev), 1840 raidstrategy, rs->sc_dkdev.dk_label, 1841 rs->sc_dkdev.dk_cpulabel); 1842 } 1843 rs->sc_flags &= ~RAIDF_LABELLING; 1844 1845 raidunlock(rs); 1846 1847 if (error) 1848 return (error); 1849 break; 1850 } 1851 1852 case DIOCWLABEL: 1853 if (*(int *) data != 0) 1854 rs->sc_flags |= RAIDF_WLABEL; 1855 else 1856 rs->sc_flags &= ~RAIDF_WLABEL; 1857 break; 1858 1859 case DIOCGDEFLABEL: 1860 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1861 break; 1862 1863 #ifdef __HAVE_OLD_DISKLABEL 1864 case ODIOCGDEFLABEL: 1865 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1866 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1867 return ENOTTY; 1868 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1869 break; 1870 #endif 1871 1872 case DIOCAWEDGE: 1873 case DIOCDWEDGE: 1874 dkw = (void *)data; 1875 1876 /* If the ioctl happens here, the parent is us. */ 1877 (void)strcpy(dkw->dkw_parent, rs->sc_xname); 1878 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw); 1879 1880 case DIOCLWEDGES: 1881 return dkwedge_list(&rs->sc_dkdev, 1882 (struct dkwedge_list *)data, l); 1883 case DIOCCACHESYNC: 1884 return rf_sync_component_caches(raidPtr); 1885 default: 1886 retcode = ENOTTY; 1887 } 1888 return (retcode); 1889 1890 } 1891 1892 1893 /* raidinit -- complete the rest of the initialization for the 1894 RAIDframe device. */ 1895 1896 1897 static void 1898 raidinit(RF_Raid_t *raidPtr) 1899 { 1900 cfdata_t cf; 1901 struct raid_softc *rs; 1902 int unit; 1903 1904 unit = raidPtr->raidid; 1905 1906 rs = &raid_softc[unit]; 1907 1908 /* XXX should check return code first... */ 1909 rs->sc_flags |= RAIDF_INITED; 1910 1911 /* XXX doesn't check bounds. */ 1912 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1913 1914 /* attach the pseudo device */ 1915 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1916 cf->cf_name = raid_cd.cd_name; 1917 cf->cf_atname = raid_cd.cd_name; 1918 cf->cf_unit = unit; 1919 cf->cf_fstate = FSTATE_STAR; 1920 1921 rs->sc_dev = config_attach_pseudo(cf); 1922 1923 if (rs->sc_dev == NULL) { 1924 printf("raid%d: config_attach_pseudo failed\n", 1925 raidPtr->raidid); 1926 rs->sc_flags &= ~RAIDF_INITED; 1927 free(cf, M_RAIDFRAME); 1928 return; 1929 } 1930 1931 /* disk_attach actually creates space for the CPU disklabel, among 1932 * other things, so it's critical to call this *BEFORE* we try putzing 1933 * with disklabels. */ 1934 1935 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver); 1936 disk_attach(&rs->sc_dkdev); 1937 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector); 1938 1939 /* XXX There may be a weird interaction here between this, and 1940 * protectedSectors, as used in RAIDframe. */ 1941 1942 rs->sc_size = raidPtr->totalSectors; 1943 1944 dkwedge_discover(&rs->sc_dkdev); 1945 1946 rf_set_properties(rs, raidPtr); 1947 1948 } 1949 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1950 /* wake up the daemon & tell it to get us a spare table 1951 * XXX 1952 * the entries in the queues should be tagged with the raidPtr 1953 * so that in the extremely rare case that two recons happen at once, 1954 * we know for which device were requesting a spare table 1955 * XXX 1956 * 1957 * XXX This code is not currently used. GO 1958 */ 1959 int 1960 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1961 { 1962 int retcode; 1963 1964 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1965 req->next = rf_sparet_wait_queue; 1966 rf_sparet_wait_queue = req; 1967 wakeup(&rf_sparet_wait_queue); 1968 1969 /* mpsleep unlocks the mutex */ 1970 while (!rf_sparet_resp_queue) { 1971 tsleep(&rf_sparet_resp_queue, PRIBIO, 1972 "raidframe getsparetable", 0); 1973 } 1974 req = rf_sparet_resp_queue; 1975 rf_sparet_resp_queue = req->next; 1976 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1977 1978 retcode = req->fcol; 1979 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1980 * alloc'd */ 1981 return (retcode); 1982 } 1983 #endif 1984 1985 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1986 * bp & passes it down. 1987 * any calls originating in the kernel must use non-blocking I/O 1988 * do some extra sanity checking to return "appropriate" error values for 1989 * certain conditions (to make some standard utilities work) 1990 * 1991 * Formerly known as: rf_DoAccessKernel 1992 */ 1993 void 1994 raidstart(RF_Raid_t *raidPtr) 1995 { 1996 RF_SectorCount_t num_blocks, pb, sum; 1997 RF_RaidAddr_t raid_addr; 1998 struct partition *pp; 1999 daddr_t blocknum; 2000 int unit; 2001 struct raid_softc *rs; 2002 int do_async; 2003 struct buf *bp; 2004 int rc; 2005 2006 unit = raidPtr->raidid; 2007 rs = &raid_softc[unit]; 2008 2009 /* quick check to see if anything has died recently */ 2010 RF_LOCK_MUTEX(raidPtr->mutex); 2011 if (raidPtr->numNewFailures > 0) { 2012 RF_UNLOCK_MUTEX(raidPtr->mutex); 2013 rf_update_component_labels(raidPtr, 2014 RF_NORMAL_COMPONENT_UPDATE); 2015 RF_LOCK_MUTEX(raidPtr->mutex); 2016 raidPtr->numNewFailures--; 2017 } 2018 2019 /* Check to see if we're at the limit... */ 2020 while (raidPtr->openings > 0) { 2021 RF_UNLOCK_MUTEX(raidPtr->mutex); 2022 2023 /* get the next item, if any, from the queue */ 2024 if ((bp = bufq_get(rs->buf_queue)) == NULL) { 2025 /* nothing more to do */ 2026 return; 2027 } 2028 2029 /* Ok, for the bp we have here, bp->b_blkno is relative to the 2030 * partition.. Need to make it absolute to the underlying 2031 * device.. */ 2032 2033 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector; 2034 if (DISKPART(bp->b_dev) != RAW_PART) { 2035 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 2036 blocknum += pp->p_offset; 2037 } 2038 2039 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 2040 (int) blocknum)); 2041 2042 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 2043 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 2044 2045 /* *THIS* is where we adjust what block we're going to... 2046 * but DO NOT TOUCH bp->b_blkno!!! */ 2047 raid_addr = blocknum; 2048 2049 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 2050 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 2051 sum = raid_addr + num_blocks + pb; 2052 if (1 || rf_debugKernelAccess) { 2053 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 2054 (int) raid_addr, (int) sum, (int) num_blocks, 2055 (int) pb, (int) bp->b_resid)); 2056 } 2057 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 2058 || (sum < num_blocks) || (sum < pb)) { 2059 bp->b_error = ENOSPC; 2060 bp->b_resid = bp->b_bcount; 2061 biodone(bp); 2062 RF_LOCK_MUTEX(raidPtr->mutex); 2063 continue; 2064 } 2065 /* 2066 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 2067 */ 2068 2069 if (bp->b_bcount & raidPtr->sectorMask) { 2070 bp->b_error = EINVAL; 2071 bp->b_resid = bp->b_bcount; 2072 biodone(bp); 2073 RF_LOCK_MUTEX(raidPtr->mutex); 2074 continue; 2075 2076 } 2077 db1_printf(("Calling DoAccess..\n")); 2078 2079 2080 RF_LOCK_MUTEX(raidPtr->mutex); 2081 raidPtr->openings--; 2082 RF_UNLOCK_MUTEX(raidPtr->mutex); 2083 2084 /* 2085 * Everything is async. 2086 */ 2087 do_async = 1; 2088 2089 disk_busy(&rs->sc_dkdev); 2090 2091 /* XXX we're still at splbio() here... do we *really* 2092 need to be? */ 2093 2094 /* don't ever condition on bp->b_flags & B_WRITE. 2095 * always condition on B_READ instead */ 2096 2097 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 2098 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 2099 do_async, raid_addr, num_blocks, 2100 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 2101 2102 if (rc) { 2103 bp->b_error = rc; 2104 bp->b_resid = bp->b_bcount; 2105 biodone(bp); 2106 /* continue loop */ 2107 } 2108 2109 RF_LOCK_MUTEX(raidPtr->mutex); 2110 } 2111 RF_UNLOCK_MUTEX(raidPtr->mutex); 2112 } 2113 2114 2115 2116 2117 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 2118 2119 int 2120 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 2121 { 2122 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2123 struct buf *bp; 2124 2125 req->queue = queue; 2126 bp = req->bp; 2127 2128 switch (req->type) { 2129 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2130 /* XXX need to do something extra here.. */ 2131 /* I'm leaving this in, as I've never actually seen it used, 2132 * and I'd like folks to report it... GO */ 2133 printf(("WAKEUP CALLED\n")); 2134 queue->numOutstanding++; 2135 2136 bp->b_flags = 0; 2137 bp->b_private = req; 2138 2139 KernelWakeupFunc(bp); 2140 break; 2141 2142 case RF_IO_TYPE_READ: 2143 case RF_IO_TYPE_WRITE: 2144 #if RF_ACC_TRACE > 0 2145 if (req->tracerec) { 2146 RF_ETIMER_START(req->tracerec->timer); 2147 } 2148 #endif 2149 InitBP(bp, queue->rf_cinfo->ci_vp, 2150 op, queue->rf_cinfo->ci_dev, 2151 req->sectorOffset, req->numSector, 2152 req->buf, KernelWakeupFunc, (void *) req, 2153 queue->raidPtr->logBytesPerSector, req->b_proc); 2154 2155 if (rf_debugKernelAccess) { 2156 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2157 (long) bp->b_blkno)); 2158 } 2159 queue->numOutstanding++; 2160 queue->last_deq_sector = req->sectorOffset; 2161 /* acc wouldn't have been let in if there were any pending 2162 * reqs at any other priority */ 2163 queue->curPriority = req->priority; 2164 2165 db1_printf(("Going for %c to unit %d col %d\n", 2166 req->type, queue->raidPtr->raidid, 2167 queue->col)); 2168 db1_printf(("sector %d count %d (%d bytes) %d\n", 2169 (int) req->sectorOffset, (int) req->numSector, 2170 (int) (req->numSector << 2171 queue->raidPtr->logBytesPerSector), 2172 (int) queue->raidPtr->logBytesPerSector)); 2173 2174 /* 2175 * XXX: drop lock here since this can block at 2176 * least with backing SCSI devices. Retake it 2177 * to minimize fuss with calling interfaces. 2178 */ 2179 2180 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam"); 2181 bdev_strategy(bp); 2182 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam"); 2183 break; 2184 2185 default: 2186 panic("bad req->type in rf_DispatchKernelIO"); 2187 } 2188 db1_printf(("Exiting from DispatchKernelIO\n")); 2189 2190 return (0); 2191 } 2192 /* this is the callback function associated with a I/O invoked from 2193 kernel code. 2194 */ 2195 static void 2196 KernelWakeupFunc(struct buf *bp) 2197 { 2198 RF_DiskQueueData_t *req = NULL; 2199 RF_DiskQueue_t *queue; 2200 int s; 2201 2202 s = splbio(); 2203 db1_printf(("recovering the request queue:\n")); 2204 req = bp->b_private; 2205 2206 queue = (RF_DiskQueue_t *) req->queue; 2207 2208 #if RF_ACC_TRACE > 0 2209 if (req->tracerec) { 2210 RF_ETIMER_STOP(req->tracerec->timer); 2211 RF_ETIMER_EVAL(req->tracerec->timer); 2212 RF_LOCK_MUTEX(rf_tracing_mutex); 2213 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2214 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2215 req->tracerec->num_phys_ios++; 2216 RF_UNLOCK_MUTEX(rf_tracing_mutex); 2217 } 2218 #endif 2219 2220 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2221 * ballistic, and mark the component as hosed... */ 2222 2223 if (bp->b_error != 0) { 2224 /* Mark the disk as dead */ 2225 /* but only mark it once... */ 2226 /* and only if it wouldn't leave this RAID set 2227 completely broken */ 2228 if (((queue->raidPtr->Disks[queue->col].status == 2229 rf_ds_optimal) || 2230 (queue->raidPtr->Disks[queue->col].status == 2231 rf_ds_used_spare)) && 2232 (queue->raidPtr->numFailures < 2233 queue->raidPtr->Layout.map->faultsTolerated)) { 2234 printf("raid%d: IO Error. Marking %s as failed.\n", 2235 queue->raidPtr->raidid, 2236 queue->raidPtr->Disks[queue->col].devname); 2237 queue->raidPtr->Disks[queue->col].status = 2238 rf_ds_failed; 2239 queue->raidPtr->status = rf_rs_degraded; 2240 queue->raidPtr->numFailures++; 2241 queue->raidPtr->numNewFailures++; 2242 } else { /* Disk is already dead... */ 2243 /* printf("Disk already marked as dead!\n"); */ 2244 } 2245 2246 } 2247 2248 /* Fill in the error value */ 2249 2250 req->error = bp->b_error; 2251 2252 simple_lock(&queue->raidPtr->iodone_lock); 2253 2254 /* Drop this one on the "finished" queue... */ 2255 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2256 2257 /* Let the raidio thread know there is work to be done. */ 2258 wakeup(&(queue->raidPtr->iodone)); 2259 2260 simple_unlock(&queue->raidPtr->iodone_lock); 2261 2262 splx(s); 2263 } 2264 2265 2266 2267 /* 2268 * initialize a buf structure for doing an I/O in the kernel. 2269 */ 2270 static void 2271 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2272 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2273 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 2274 struct proc *b_proc) 2275 { 2276 /* bp->b_flags = B_PHYS | rw_flag; */ 2277 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */ 2278 bp->b_oflags = 0; 2279 bp->b_cflags = 0; 2280 bp->b_bcount = numSect << logBytesPerSector; 2281 bp->b_bufsize = bp->b_bcount; 2282 bp->b_error = 0; 2283 bp->b_dev = dev; 2284 bp->b_data = bf; 2285 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT; 2286 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2287 if (bp->b_bcount == 0) { 2288 panic("bp->b_bcount is zero in InitBP!!"); 2289 } 2290 bp->b_proc = b_proc; 2291 bp->b_iodone = cbFunc; 2292 bp->b_private = cbArg; 2293 } 2294 2295 static void 2296 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 2297 struct disklabel *lp) 2298 { 2299 memset(lp, 0, sizeof(*lp)); 2300 2301 /* fabricate a label... */ 2302 lp->d_secperunit = raidPtr->totalSectors; 2303 lp->d_secsize = raidPtr->bytesPerSector; 2304 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2305 lp->d_ntracks = 4 * raidPtr->numCol; 2306 lp->d_ncylinders = raidPtr->totalSectors / 2307 (lp->d_nsectors * lp->d_ntracks); 2308 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2309 2310 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2311 lp->d_type = DTYPE_RAID; 2312 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2313 lp->d_rpm = 3600; 2314 lp->d_interleave = 1; 2315 lp->d_flags = 0; 2316 2317 lp->d_partitions[RAW_PART].p_offset = 0; 2318 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2319 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2320 lp->d_npartitions = RAW_PART + 1; 2321 2322 lp->d_magic = DISKMAGIC; 2323 lp->d_magic2 = DISKMAGIC; 2324 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2325 2326 } 2327 /* 2328 * Read the disklabel from the raid device. If one is not present, fake one 2329 * up. 2330 */ 2331 static void 2332 raidgetdisklabel(dev_t dev) 2333 { 2334 int unit = raidunit(dev); 2335 struct raid_softc *rs = &raid_softc[unit]; 2336 const char *errstring; 2337 struct disklabel *lp = rs->sc_dkdev.dk_label; 2338 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2339 RF_Raid_t *raidPtr; 2340 2341 db1_printf(("Getting the disklabel...\n")); 2342 2343 memset(clp, 0, sizeof(*clp)); 2344 2345 raidPtr = raidPtrs[unit]; 2346 2347 raidgetdefaultlabel(raidPtr, rs, lp); 2348 2349 /* 2350 * Call the generic disklabel extraction routine. 2351 */ 2352 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2353 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2354 if (errstring) 2355 raidmakedisklabel(rs); 2356 else { 2357 int i; 2358 struct partition *pp; 2359 2360 /* 2361 * Sanity check whether the found disklabel is valid. 2362 * 2363 * This is necessary since total size of the raid device 2364 * may vary when an interleave is changed even though exactly 2365 * same components are used, and old disklabel may used 2366 * if that is found. 2367 */ 2368 if (lp->d_secperunit != rs->sc_size) 2369 printf("raid%d: WARNING: %s: " 2370 "total sector size in disklabel (%" PRIu32 ") != " 2371 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname, 2372 lp->d_secperunit, rs->sc_size); 2373 for (i = 0; i < lp->d_npartitions; i++) { 2374 pp = &lp->d_partitions[i]; 2375 if (pp->p_offset + pp->p_size > rs->sc_size) 2376 printf("raid%d: WARNING: %s: end of partition `%c' " 2377 "exceeds the size of raid (%" PRIu64 ")\n", 2378 unit, rs->sc_xname, 'a' + i, rs->sc_size); 2379 } 2380 } 2381 2382 } 2383 /* 2384 * Take care of things one might want to take care of in the event 2385 * that a disklabel isn't present. 2386 */ 2387 static void 2388 raidmakedisklabel(struct raid_softc *rs) 2389 { 2390 struct disklabel *lp = rs->sc_dkdev.dk_label; 2391 db1_printf(("Making a label..\n")); 2392 2393 /* 2394 * For historical reasons, if there's no disklabel present 2395 * the raw partition must be marked FS_BSDFFS. 2396 */ 2397 2398 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2399 2400 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2401 2402 lp->d_checksum = dkcksum(lp); 2403 } 2404 /* 2405 * Wait interruptibly for an exclusive lock. 2406 * 2407 * XXX 2408 * Several drivers do this; it should be abstracted and made MP-safe. 2409 * (Hmm... where have we seen this warning before :-> GO ) 2410 */ 2411 static int 2412 raidlock(struct raid_softc *rs) 2413 { 2414 int error; 2415 2416 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2417 rs->sc_flags |= RAIDF_WANTED; 2418 if ((error = 2419 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2420 return (error); 2421 } 2422 rs->sc_flags |= RAIDF_LOCKED; 2423 return (0); 2424 } 2425 /* 2426 * Unlock and wake up any waiters. 2427 */ 2428 static void 2429 raidunlock(struct raid_softc *rs) 2430 { 2431 2432 rs->sc_flags &= ~RAIDF_LOCKED; 2433 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2434 rs->sc_flags &= ~RAIDF_WANTED; 2435 wakeup(rs); 2436 } 2437 } 2438 2439 2440 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2441 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2442 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE 2443 2444 static daddr_t 2445 rf_component_info_offset(void) 2446 { 2447 2448 return RF_COMPONENT_INFO_OFFSET; 2449 } 2450 2451 static daddr_t 2452 rf_component_info_size(unsigned secsize) 2453 { 2454 daddr_t info_size; 2455 2456 KASSERT(secsize); 2457 if (secsize > RF_COMPONENT_INFO_SIZE) 2458 info_size = secsize; 2459 else 2460 info_size = RF_COMPONENT_INFO_SIZE; 2461 2462 return info_size; 2463 } 2464 2465 static daddr_t 2466 rf_parity_map_offset(RF_Raid_t *raidPtr) 2467 { 2468 daddr_t map_offset; 2469 2470 KASSERT(raidPtr->bytesPerSector); 2471 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE) 2472 map_offset = raidPtr->bytesPerSector; 2473 else 2474 map_offset = RF_COMPONENT_INFO_SIZE; 2475 map_offset += rf_component_info_offset(); 2476 2477 return map_offset; 2478 } 2479 2480 static daddr_t 2481 rf_parity_map_size(RF_Raid_t *raidPtr) 2482 { 2483 daddr_t map_size; 2484 2485 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE) 2486 map_size = raidPtr->bytesPerSector; 2487 else 2488 map_size = RF_PARITY_MAP_SIZE; 2489 2490 return map_size; 2491 } 2492 2493 int 2494 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col) 2495 { 2496 RF_ComponentLabel_t *clabel; 2497 2498 clabel = raidget_component_label(raidPtr, col); 2499 clabel->clean = RF_RAID_CLEAN; 2500 raidflush_component_label(raidPtr, col); 2501 return(0); 2502 } 2503 2504 2505 int 2506 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col) 2507 { 2508 RF_ComponentLabel_t *clabel; 2509 2510 clabel = raidget_component_label(raidPtr, col); 2511 clabel->clean = RF_RAID_DIRTY; 2512 raidflush_component_label(raidPtr, col); 2513 return(0); 2514 } 2515 2516 int 2517 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2518 { 2519 KASSERT(raidPtr->bytesPerSector); 2520 return raidread_component_label(raidPtr->bytesPerSector, 2521 raidPtr->Disks[col].dev, 2522 raidPtr->raid_cinfo[col].ci_vp, 2523 &raidPtr->raid_cinfo[col].ci_label); 2524 } 2525 2526 RF_ComponentLabel_t * 2527 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2528 { 2529 return &raidPtr->raid_cinfo[col].ci_label; 2530 } 2531 2532 int 2533 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2534 { 2535 RF_ComponentLabel_t *label; 2536 2537 label = &raidPtr->raid_cinfo[col].ci_label; 2538 label->mod_counter = raidPtr->mod_counter; 2539 #ifndef RF_NO_PARITY_MAP 2540 label->parity_map_modcount = label->mod_counter; 2541 #endif 2542 return raidwrite_component_label(raidPtr->bytesPerSector, 2543 raidPtr->Disks[col].dev, 2544 raidPtr->raid_cinfo[col].ci_vp, label); 2545 } 2546 2547 2548 static int 2549 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2550 RF_ComponentLabel_t *clabel) 2551 { 2552 return raidread_component_area(dev, b_vp, clabel, 2553 sizeof(RF_ComponentLabel_t), 2554 rf_component_info_offset(), 2555 rf_component_info_size(secsize)); 2556 } 2557 2558 /* ARGSUSED */ 2559 static int 2560 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data, 2561 size_t msize, daddr_t offset, daddr_t dsize) 2562 { 2563 struct buf *bp; 2564 const struct bdevsw *bdev; 2565 int error; 2566 2567 /* XXX should probably ensure that we don't try to do this if 2568 someone has changed rf_protected_sectors. */ 2569 2570 if (b_vp == NULL) { 2571 /* For whatever reason, this component is not valid. 2572 Don't try to read a component label from it. */ 2573 return(EINVAL); 2574 } 2575 2576 /* get a block of the appropriate size... */ 2577 bp = geteblk((int)dsize); 2578 bp->b_dev = dev; 2579 2580 /* get our ducks in a row for the read */ 2581 bp->b_blkno = offset / DEV_BSIZE; 2582 bp->b_bcount = dsize; 2583 bp->b_flags |= B_READ; 2584 bp->b_resid = dsize; 2585 2586 bdev = bdevsw_lookup(bp->b_dev); 2587 if (bdev == NULL) 2588 return (ENXIO); 2589 (*bdev->d_strategy)(bp); 2590 2591 error = biowait(bp); 2592 2593 if (!error) { 2594 memcpy(data, bp->b_data, msize); 2595 } 2596 2597 brelse(bp, 0); 2598 return(error); 2599 } 2600 2601 2602 static int 2603 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2604 RF_ComponentLabel_t *clabel) 2605 { 2606 return raidwrite_component_area(dev, b_vp, clabel, 2607 sizeof(RF_ComponentLabel_t), 2608 rf_component_info_offset(), 2609 rf_component_info_size(secsize), 0); 2610 } 2611 2612 /* ARGSUSED */ 2613 static int 2614 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data, 2615 size_t msize, daddr_t offset, daddr_t dsize, int asyncp) 2616 { 2617 struct buf *bp; 2618 const struct bdevsw *bdev; 2619 int error; 2620 2621 /* get a block of the appropriate size... */ 2622 bp = geteblk((int)dsize); 2623 bp->b_dev = dev; 2624 2625 /* get our ducks in a row for the write */ 2626 bp->b_blkno = offset / DEV_BSIZE; 2627 bp->b_bcount = dsize; 2628 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0); 2629 bp->b_resid = dsize; 2630 2631 memset(bp->b_data, 0, dsize); 2632 memcpy(bp->b_data, data, msize); 2633 2634 bdev = bdevsw_lookup(bp->b_dev); 2635 if (bdev == NULL) 2636 return (ENXIO); 2637 (*bdev->d_strategy)(bp); 2638 if (asyncp) 2639 return 0; 2640 error = biowait(bp); 2641 brelse(bp, 0); 2642 if (error) { 2643 #if 1 2644 printf("Failed to write RAID component info!\n"); 2645 #endif 2646 } 2647 2648 return(error); 2649 } 2650 2651 void 2652 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2653 { 2654 int c; 2655 2656 for (c = 0; c < raidPtr->numCol; c++) { 2657 /* Skip dead disks. */ 2658 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2659 continue; 2660 /* XXXjld: what if an error occurs here? */ 2661 raidwrite_component_area(raidPtr->Disks[c].dev, 2662 raidPtr->raid_cinfo[c].ci_vp, map, 2663 RF_PARITYMAP_NBYTE, 2664 rf_parity_map_offset(raidPtr), 2665 rf_parity_map_size(raidPtr), 0); 2666 } 2667 } 2668 2669 void 2670 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2671 { 2672 struct rf_paritymap_ondisk tmp; 2673 int c,first; 2674 2675 first=1; 2676 for (c = 0; c < raidPtr->numCol; c++) { 2677 /* Skip dead disks. */ 2678 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2679 continue; 2680 raidread_component_area(raidPtr->Disks[c].dev, 2681 raidPtr->raid_cinfo[c].ci_vp, &tmp, 2682 RF_PARITYMAP_NBYTE, 2683 rf_parity_map_offset(raidPtr), 2684 rf_parity_map_size(raidPtr)); 2685 if (first) { 2686 memcpy(map, &tmp, sizeof(*map)); 2687 first = 0; 2688 } else { 2689 rf_paritymap_merge(map, &tmp); 2690 } 2691 } 2692 } 2693 2694 void 2695 rf_markalldirty(RF_Raid_t *raidPtr) 2696 { 2697 RF_ComponentLabel_t *clabel; 2698 int sparecol; 2699 int c; 2700 int j; 2701 int scol = -1; 2702 2703 raidPtr->mod_counter++; 2704 for (c = 0; c < raidPtr->numCol; c++) { 2705 /* we don't want to touch (at all) a disk that has 2706 failed */ 2707 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2708 clabel = raidget_component_label(raidPtr, c); 2709 if (clabel->status == rf_ds_spared) { 2710 /* XXX do something special... 2711 but whatever you do, don't 2712 try to access it!! */ 2713 } else { 2714 raidmarkdirty(raidPtr, c); 2715 } 2716 } 2717 } 2718 2719 for( c = 0; c < raidPtr->numSpare ; c++) { 2720 sparecol = raidPtr->numCol + c; 2721 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2722 /* 2723 2724 we claim this disk is "optimal" if it's 2725 rf_ds_used_spare, as that means it should be 2726 directly substitutable for the disk it replaced. 2727 We note that too... 2728 2729 */ 2730 2731 for(j=0;j<raidPtr->numCol;j++) { 2732 if (raidPtr->Disks[j].spareCol == sparecol) { 2733 scol = j; 2734 break; 2735 } 2736 } 2737 2738 clabel = raidget_component_label(raidPtr, sparecol); 2739 /* make sure status is noted */ 2740 2741 raid_init_component_label(raidPtr, clabel); 2742 2743 clabel->row = 0; 2744 clabel->column = scol; 2745 /* Note: we *don't* change status from rf_ds_used_spare 2746 to rf_ds_optimal */ 2747 /* clabel.status = rf_ds_optimal; */ 2748 2749 raidmarkdirty(raidPtr, sparecol); 2750 } 2751 } 2752 } 2753 2754 2755 void 2756 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2757 { 2758 RF_ComponentLabel_t *clabel; 2759 int sparecol; 2760 int c; 2761 int j; 2762 int scol; 2763 2764 scol = -1; 2765 2766 /* XXX should do extra checks to make sure things really are clean, 2767 rather than blindly setting the clean bit... */ 2768 2769 raidPtr->mod_counter++; 2770 2771 for (c = 0; c < raidPtr->numCol; c++) { 2772 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2773 clabel = raidget_component_label(raidPtr, c); 2774 /* make sure status is noted */ 2775 clabel->status = rf_ds_optimal; 2776 2777 /* note what unit we are configured as */ 2778 clabel->last_unit = raidPtr->raidid; 2779 2780 raidflush_component_label(raidPtr, c); 2781 if (final == RF_FINAL_COMPONENT_UPDATE) { 2782 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2783 raidmarkclean(raidPtr, c); 2784 } 2785 } 2786 } 2787 /* else we don't touch it.. */ 2788 } 2789 2790 for( c = 0; c < raidPtr->numSpare ; c++) { 2791 sparecol = raidPtr->numCol + c; 2792 /* Need to ensure that the reconstruct actually completed! */ 2793 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2794 /* 2795 2796 we claim this disk is "optimal" if it's 2797 rf_ds_used_spare, as that means it should be 2798 directly substitutable for the disk it replaced. 2799 We note that too... 2800 2801 */ 2802 2803 for(j=0;j<raidPtr->numCol;j++) { 2804 if (raidPtr->Disks[j].spareCol == sparecol) { 2805 scol = j; 2806 break; 2807 } 2808 } 2809 2810 /* XXX shouldn't *really* need this... */ 2811 clabel = raidget_component_label(raidPtr, sparecol); 2812 /* make sure status is noted */ 2813 2814 raid_init_component_label(raidPtr, clabel); 2815 2816 clabel->column = scol; 2817 clabel->status = rf_ds_optimal; 2818 clabel->last_unit = raidPtr->raidid; 2819 2820 raidflush_component_label(raidPtr, sparecol); 2821 if (final == RF_FINAL_COMPONENT_UPDATE) { 2822 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2823 raidmarkclean(raidPtr, sparecol); 2824 } 2825 } 2826 } 2827 } 2828 } 2829 2830 void 2831 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2832 { 2833 2834 if (vp != NULL) { 2835 if (auto_configured == 1) { 2836 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2837 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2838 vput(vp); 2839 2840 } else { 2841 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2842 } 2843 } 2844 } 2845 2846 2847 void 2848 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2849 { 2850 int r,c; 2851 struct vnode *vp; 2852 int acd; 2853 2854 2855 /* We take this opportunity to close the vnodes like we should.. */ 2856 2857 for (c = 0; c < raidPtr->numCol; c++) { 2858 vp = raidPtr->raid_cinfo[c].ci_vp; 2859 acd = raidPtr->Disks[c].auto_configured; 2860 rf_close_component(raidPtr, vp, acd); 2861 raidPtr->raid_cinfo[c].ci_vp = NULL; 2862 raidPtr->Disks[c].auto_configured = 0; 2863 } 2864 2865 for (r = 0; r < raidPtr->numSpare; r++) { 2866 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2867 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2868 rf_close_component(raidPtr, vp, acd); 2869 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2870 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2871 } 2872 } 2873 2874 2875 void 2876 rf_ReconThread(struct rf_recon_req *req) 2877 { 2878 int s; 2879 RF_Raid_t *raidPtr; 2880 2881 s = splbio(); 2882 raidPtr = (RF_Raid_t *) req->raidPtr; 2883 raidPtr->recon_in_progress = 1; 2884 2885 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2886 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2887 2888 RF_Free(req, sizeof(*req)); 2889 2890 raidPtr->recon_in_progress = 0; 2891 splx(s); 2892 2893 /* That's all... */ 2894 kthread_exit(0); /* does not return */ 2895 } 2896 2897 void 2898 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2899 { 2900 int retcode; 2901 int s; 2902 2903 raidPtr->parity_rewrite_stripes_done = 0; 2904 raidPtr->parity_rewrite_in_progress = 1; 2905 s = splbio(); 2906 retcode = rf_RewriteParity(raidPtr); 2907 splx(s); 2908 if (retcode) { 2909 printf("raid%d: Error re-writing parity (%d)!\n", 2910 raidPtr->raidid, retcode); 2911 } else { 2912 /* set the clean bit! If we shutdown correctly, 2913 the clean bit on each component label will get 2914 set */ 2915 raidPtr->parity_good = RF_RAID_CLEAN; 2916 } 2917 raidPtr->parity_rewrite_in_progress = 0; 2918 2919 /* Anyone waiting for us to stop? If so, inform them... */ 2920 if (raidPtr->waitShutdown) { 2921 wakeup(&raidPtr->parity_rewrite_in_progress); 2922 } 2923 2924 /* That's all... */ 2925 kthread_exit(0); /* does not return */ 2926 } 2927 2928 2929 void 2930 rf_CopybackThread(RF_Raid_t *raidPtr) 2931 { 2932 int s; 2933 2934 raidPtr->copyback_in_progress = 1; 2935 s = splbio(); 2936 rf_CopybackReconstructedData(raidPtr); 2937 splx(s); 2938 raidPtr->copyback_in_progress = 0; 2939 2940 /* That's all... */ 2941 kthread_exit(0); /* does not return */ 2942 } 2943 2944 2945 void 2946 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 2947 { 2948 int s; 2949 RF_Raid_t *raidPtr; 2950 2951 s = splbio(); 2952 raidPtr = req->raidPtr; 2953 raidPtr->recon_in_progress = 1; 2954 rf_ReconstructInPlace(raidPtr, req->col); 2955 RF_Free(req, sizeof(*req)); 2956 raidPtr->recon_in_progress = 0; 2957 splx(s); 2958 2959 /* That's all... */ 2960 kthread_exit(0); /* does not return */ 2961 } 2962 2963 static RF_AutoConfig_t * 2964 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 2965 const char *cname, RF_SectorCount_t size, uint64_t numsecs, 2966 unsigned secsize) 2967 { 2968 int good_one = 0; 2969 RF_ComponentLabel_t *clabel; 2970 RF_AutoConfig_t *ac; 2971 2972 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT); 2973 if (clabel == NULL) { 2974 oomem: 2975 while(ac_list) { 2976 ac = ac_list; 2977 if (ac->clabel) 2978 free(ac->clabel, M_RAIDFRAME); 2979 ac_list = ac_list->next; 2980 free(ac, M_RAIDFRAME); 2981 } 2982 printf("RAID auto config: out of memory!\n"); 2983 return NULL; /* XXX probably should panic? */ 2984 } 2985 2986 if (!raidread_component_label(secsize, dev, vp, clabel)) { 2987 /* Got the label. Does it look reasonable? */ 2988 if (rf_reasonable_label(clabel) && 2989 (clabel->partitionSize <= size)) { 2990 rf_fix_old_label_size(clabel, numsecs); 2991 #ifdef DEBUG 2992 printf("Component on: %s: %llu\n", 2993 cname, (unsigned long long)size); 2994 rf_print_component_label(clabel); 2995 #endif 2996 /* if it's reasonable, add it, else ignore it. */ 2997 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 2998 M_NOWAIT); 2999 if (ac == NULL) { 3000 free(clabel, M_RAIDFRAME); 3001 goto oomem; 3002 } 3003 strlcpy(ac->devname, cname, sizeof(ac->devname)); 3004 ac->dev = dev; 3005 ac->vp = vp; 3006 ac->clabel = clabel; 3007 ac->next = ac_list; 3008 ac_list = ac; 3009 good_one = 1; 3010 } 3011 } 3012 if (!good_one) { 3013 /* cleanup */ 3014 free(clabel, M_RAIDFRAME); 3015 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3016 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3017 vput(vp); 3018 } 3019 return ac_list; 3020 } 3021 3022 RF_AutoConfig_t * 3023 rf_find_raid_components(void) 3024 { 3025 struct vnode *vp; 3026 struct disklabel label; 3027 device_t dv; 3028 deviter_t di; 3029 dev_t dev; 3030 int bmajor, bminor, wedge; 3031 int error; 3032 int i; 3033 RF_AutoConfig_t *ac_list; 3034 uint64_t numsecs; 3035 unsigned secsize; 3036 3037 RF_ASSERT(raidPtr->bytesPerSector < rf_component_info_offset()); 3038 3039 /* initialize the AutoConfig list */ 3040 ac_list = NULL; 3041 3042 /* we begin by trolling through *all* the devices on the system */ 3043 3044 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; 3045 dv = deviter_next(&di)) { 3046 3047 /* we are only interested in disks... */ 3048 if (device_class(dv) != DV_DISK) 3049 continue; 3050 3051 /* we don't care about floppies... */ 3052 if (device_is_a(dv, "fd")) { 3053 continue; 3054 } 3055 3056 /* we don't care about CD's... */ 3057 if (device_is_a(dv, "cd")) { 3058 continue; 3059 } 3060 3061 /* we don't care about md's... */ 3062 if (device_is_a(dv, "md")) { 3063 continue; 3064 } 3065 3066 /* hdfd is the Atari/Hades floppy driver */ 3067 if (device_is_a(dv, "hdfd")) { 3068 continue; 3069 } 3070 3071 /* fdisa is the Atari/Milan floppy driver */ 3072 if (device_is_a(dv, "fdisa")) { 3073 continue; 3074 } 3075 3076 /* need to find the device_name_to_block_device_major stuff */ 3077 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 3078 3079 /* get a vnode for the raw partition of this disk */ 3080 3081 wedge = device_is_a(dv, "dk"); 3082 bminor = minor(device_unit(dv)); 3083 dev = wedge ? makedev(bmajor, bminor) : 3084 MAKEDISKDEV(bmajor, bminor, RAW_PART); 3085 if (bdevvp(dev, &vp)) 3086 panic("RAID can't alloc vnode"); 3087 3088 error = VOP_OPEN(vp, FREAD, NOCRED); 3089 3090 if (error) { 3091 /* "Who cares." Continue looking 3092 for something that exists*/ 3093 vput(vp); 3094 continue; 3095 } 3096 3097 error = getdisksize(vp, &numsecs, &secsize); 3098 if (error) { 3099 vput(vp); 3100 continue; 3101 } 3102 if (wedge) { 3103 struct dkwedge_info dkw; 3104 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 3105 NOCRED); 3106 if (error) { 3107 printf("RAIDframe: can't get wedge info for " 3108 "dev %s (%d)\n", device_xname(dv), error); 3109 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3110 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3111 vput(vp); 3112 continue; 3113 } 3114 3115 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 3116 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3117 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3118 vput(vp); 3119 continue; 3120 } 3121 3122 ac_list = rf_get_component(ac_list, dev, vp, 3123 device_xname(dv), dkw.dkw_size, numsecs, secsize); 3124 continue; 3125 } 3126 3127 /* Ok, the disk exists. Go get the disklabel. */ 3128 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 3129 if (error) { 3130 /* 3131 * XXX can't happen - open() would 3132 * have errored out (or faked up one) 3133 */ 3134 if (error != ENOTTY) 3135 printf("RAIDframe: can't get label for dev " 3136 "%s (%d)\n", device_xname(dv), error); 3137 } 3138 3139 /* don't need this any more. We'll allocate it again 3140 a little later if we really do... */ 3141 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3142 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3143 vput(vp); 3144 3145 if (error) 3146 continue; 3147 3148 for (i = 0; i < label.d_npartitions; i++) { 3149 char cname[sizeof(ac_list->devname)]; 3150 3151 /* We only support partitions marked as RAID */ 3152 if (label.d_partitions[i].p_fstype != FS_RAID) 3153 continue; 3154 3155 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 3156 if (bdevvp(dev, &vp)) 3157 panic("RAID can't alloc vnode"); 3158 3159 error = VOP_OPEN(vp, FREAD, NOCRED); 3160 if (error) { 3161 /* Whatever... */ 3162 vput(vp); 3163 continue; 3164 } 3165 snprintf(cname, sizeof(cname), "%s%c", 3166 device_xname(dv), 'a' + i); 3167 ac_list = rf_get_component(ac_list, dev, vp, cname, 3168 label.d_partitions[i].p_size, numsecs, secsize); 3169 } 3170 } 3171 deviter_release(&di); 3172 return ac_list; 3173 } 3174 3175 3176 static int 3177 rf_reasonable_label(RF_ComponentLabel_t *clabel) 3178 { 3179 3180 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 3181 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 3182 ((clabel->clean == RF_RAID_CLEAN) || 3183 (clabel->clean == RF_RAID_DIRTY)) && 3184 clabel->row >=0 && 3185 clabel->column >= 0 && 3186 clabel->num_rows > 0 && 3187 clabel->num_columns > 0 && 3188 clabel->row < clabel->num_rows && 3189 clabel->column < clabel->num_columns && 3190 clabel->blockSize > 0 && 3191 clabel->numBlocks > 0) { 3192 /* label looks reasonable enough... */ 3193 return(1); 3194 } 3195 return(0); 3196 } 3197 3198 3199 /* 3200 * For reasons yet unknown, some old component labels have garbage in 3201 * the newer numBlocksHi region, and this causes lossage. Since those 3202 * disks will also have numsecs set to less than 32 bits of sectors, 3203 * we can determine when this corruption has occured, and fix it. 3204 */ 3205 static void 3206 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3207 { 3208 3209 if (clabel->numBlocksHi && numsecs < ((uint64_t)1 << 32)) { 3210 printf("WARNING: total sectors < 32 bits, yet numBlocksHi set\n" 3211 "WARNING: resetting numBlocksHi to zero.\n"); 3212 clabel->numBlocksHi = 0; 3213 } 3214 } 3215 3216 3217 #ifdef DEBUG 3218 void 3219 rf_print_component_label(RF_ComponentLabel_t *clabel) 3220 { 3221 uint64_t numBlocks = clabel->numBlocks; 3222 3223 numBlocks |= (uint64_t)clabel->numBlocksHi << 32; 3224 3225 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3226 clabel->row, clabel->column, 3227 clabel->num_rows, clabel->num_columns); 3228 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3229 clabel->version, clabel->serial_number, 3230 clabel->mod_counter); 3231 printf(" Clean: %s Status: %d\n", 3232 clabel->clean ? "Yes" : "No", clabel->status); 3233 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3234 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3235 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n", 3236 (char) clabel->parityConfig, clabel->blockSize, numBlocks); 3237 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No"); 3238 printf(" Contains root partition: %s\n", 3239 clabel->root_partition ? "Yes" : "No"); 3240 printf(" Last configured as: raid%d\n", clabel->last_unit); 3241 #if 0 3242 printf(" Config order: %d\n", clabel->config_order); 3243 #endif 3244 3245 } 3246 #endif 3247 3248 RF_ConfigSet_t * 3249 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3250 { 3251 RF_AutoConfig_t *ac; 3252 RF_ConfigSet_t *config_sets; 3253 RF_ConfigSet_t *cset; 3254 RF_AutoConfig_t *ac_next; 3255 3256 3257 config_sets = NULL; 3258 3259 /* Go through the AutoConfig list, and figure out which components 3260 belong to what sets. */ 3261 ac = ac_list; 3262 while(ac!=NULL) { 3263 /* we're going to putz with ac->next, so save it here 3264 for use at the end of the loop */ 3265 ac_next = ac->next; 3266 3267 if (config_sets == NULL) { 3268 /* will need at least this one... */ 3269 config_sets = (RF_ConfigSet_t *) 3270 malloc(sizeof(RF_ConfigSet_t), 3271 M_RAIDFRAME, M_NOWAIT); 3272 if (config_sets == NULL) { 3273 panic("rf_create_auto_sets: No memory!"); 3274 } 3275 /* this one is easy :) */ 3276 config_sets->ac = ac; 3277 config_sets->next = NULL; 3278 config_sets->rootable = 0; 3279 ac->next = NULL; 3280 } else { 3281 /* which set does this component fit into? */ 3282 cset = config_sets; 3283 while(cset!=NULL) { 3284 if (rf_does_it_fit(cset, ac)) { 3285 /* looks like it matches... */ 3286 ac->next = cset->ac; 3287 cset->ac = ac; 3288 break; 3289 } 3290 cset = cset->next; 3291 } 3292 if (cset==NULL) { 3293 /* didn't find a match above... new set..*/ 3294 cset = (RF_ConfigSet_t *) 3295 malloc(sizeof(RF_ConfigSet_t), 3296 M_RAIDFRAME, M_NOWAIT); 3297 if (cset == NULL) { 3298 panic("rf_create_auto_sets: No memory!"); 3299 } 3300 cset->ac = ac; 3301 ac->next = NULL; 3302 cset->next = config_sets; 3303 cset->rootable = 0; 3304 config_sets = cset; 3305 } 3306 } 3307 ac = ac_next; 3308 } 3309 3310 3311 return(config_sets); 3312 } 3313 3314 static int 3315 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3316 { 3317 RF_ComponentLabel_t *clabel1, *clabel2; 3318 3319 /* If this one matches the *first* one in the set, that's good 3320 enough, since the other members of the set would have been 3321 through here too... */ 3322 /* note that we are not checking partitionSize here.. 3323 3324 Note that we are also not checking the mod_counters here. 3325 If everything else matches execpt the mod_counter, that's 3326 good enough for this test. We will deal with the mod_counters 3327 a little later in the autoconfiguration process. 3328 3329 (clabel1->mod_counter == clabel2->mod_counter) && 3330 3331 The reason we don't check for this is that failed disks 3332 will have lower modification counts. If those disks are 3333 not added to the set they used to belong to, then they will 3334 form their own set, which may result in 2 different sets, 3335 for example, competing to be configured at raid0, and 3336 perhaps competing to be the root filesystem set. If the 3337 wrong ones get configured, or both attempt to become /, 3338 weird behaviour and or serious lossage will occur. Thus we 3339 need to bring them into the fold here, and kick them out at 3340 a later point. 3341 3342 */ 3343 3344 clabel1 = cset->ac->clabel; 3345 clabel2 = ac->clabel; 3346 if ((clabel1->version == clabel2->version) && 3347 (clabel1->serial_number == clabel2->serial_number) && 3348 (clabel1->num_rows == clabel2->num_rows) && 3349 (clabel1->num_columns == clabel2->num_columns) && 3350 (clabel1->sectPerSU == clabel2->sectPerSU) && 3351 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3352 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3353 (clabel1->parityConfig == clabel2->parityConfig) && 3354 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3355 (clabel1->blockSize == clabel2->blockSize) && 3356 (clabel1->numBlocks == clabel2->numBlocks) && 3357 (clabel1->numBlocksHi == clabel2->numBlocksHi) && 3358 (clabel1->autoconfigure == clabel2->autoconfigure) && 3359 (clabel1->root_partition == clabel2->root_partition) && 3360 (clabel1->last_unit == clabel2->last_unit) && 3361 (clabel1->config_order == clabel2->config_order)) { 3362 /* if it get's here, it almost *has* to be a match */ 3363 } else { 3364 /* it's not consistent with somebody in the set.. 3365 punt */ 3366 return(0); 3367 } 3368 /* all was fine.. it must fit... */ 3369 return(1); 3370 } 3371 3372 int 3373 rf_have_enough_components(RF_ConfigSet_t *cset) 3374 { 3375 RF_AutoConfig_t *ac; 3376 RF_AutoConfig_t *auto_config; 3377 RF_ComponentLabel_t *clabel; 3378 int c; 3379 int num_cols; 3380 int num_missing; 3381 int mod_counter; 3382 int mod_counter_found; 3383 int even_pair_failed; 3384 char parity_type; 3385 3386 3387 /* check to see that we have enough 'live' components 3388 of this set. If so, we can configure it if necessary */ 3389 3390 num_cols = cset->ac->clabel->num_columns; 3391 parity_type = cset->ac->clabel->parityConfig; 3392 3393 /* XXX Check for duplicate components!?!?!? */ 3394 3395 /* Determine what the mod_counter is supposed to be for this set. */ 3396 3397 mod_counter_found = 0; 3398 mod_counter = 0; 3399 ac = cset->ac; 3400 while(ac!=NULL) { 3401 if (mod_counter_found==0) { 3402 mod_counter = ac->clabel->mod_counter; 3403 mod_counter_found = 1; 3404 } else { 3405 if (ac->clabel->mod_counter > mod_counter) { 3406 mod_counter = ac->clabel->mod_counter; 3407 } 3408 } 3409 ac = ac->next; 3410 } 3411 3412 num_missing = 0; 3413 auto_config = cset->ac; 3414 3415 even_pair_failed = 0; 3416 for(c=0; c<num_cols; c++) { 3417 ac = auto_config; 3418 while(ac!=NULL) { 3419 if ((ac->clabel->column == c) && 3420 (ac->clabel->mod_counter == mod_counter)) { 3421 /* it's this one... */ 3422 #ifdef DEBUG 3423 printf("Found: %s at %d\n", 3424 ac->devname,c); 3425 #endif 3426 break; 3427 } 3428 ac=ac->next; 3429 } 3430 if (ac==NULL) { 3431 /* Didn't find one here! */ 3432 /* special case for RAID 1, especially 3433 where there are more than 2 3434 components (where RAIDframe treats 3435 things a little differently :( ) */ 3436 if (parity_type == '1') { 3437 if (c%2 == 0) { /* even component */ 3438 even_pair_failed = 1; 3439 } else { /* odd component. If 3440 we're failed, and 3441 so is the even 3442 component, it's 3443 "Good Night, Charlie" */ 3444 if (even_pair_failed == 1) { 3445 return(0); 3446 } 3447 } 3448 } else { 3449 /* normal accounting */ 3450 num_missing++; 3451 } 3452 } 3453 if ((parity_type == '1') && (c%2 == 1)) { 3454 /* Just did an even component, and we didn't 3455 bail.. reset the even_pair_failed flag, 3456 and go on to the next component.... */ 3457 even_pair_failed = 0; 3458 } 3459 } 3460 3461 clabel = cset->ac->clabel; 3462 3463 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3464 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3465 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3466 /* XXX this needs to be made *much* more general */ 3467 /* Too many failures */ 3468 return(0); 3469 } 3470 /* otherwise, all is well, and we've got enough to take a kick 3471 at autoconfiguring this set */ 3472 return(1); 3473 } 3474 3475 void 3476 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3477 RF_Raid_t *raidPtr) 3478 { 3479 RF_ComponentLabel_t *clabel; 3480 int i; 3481 3482 clabel = ac->clabel; 3483 3484 /* 1. Fill in the common stuff */ 3485 config->numRow = clabel->num_rows = 1; 3486 config->numCol = clabel->num_columns; 3487 config->numSpare = 0; /* XXX should this be set here? */ 3488 config->sectPerSU = clabel->sectPerSU; 3489 config->SUsPerPU = clabel->SUsPerPU; 3490 config->SUsPerRU = clabel->SUsPerRU; 3491 config->parityConfig = clabel->parityConfig; 3492 /* XXX... */ 3493 strcpy(config->diskQueueType,"fifo"); 3494 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3495 config->layoutSpecificSize = 0; /* XXX ?? */ 3496 3497 while(ac!=NULL) { 3498 /* row/col values will be in range due to the checks 3499 in reasonable_label() */ 3500 strcpy(config->devnames[0][ac->clabel->column], 3501 ac->devname); 3502 ac = ac->next; 3503 } 3504 3505 for(i=0;i<RF_MAXDBGV;i++) { 3506 config->debugVars[i][0] = 0; 3507 } 3508 } 3509 3510 int 3511 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3512 { 3513 RF_ComponentLabel_t *clabel; 3514 int column; 3515 int sparecol; 3516 3517 raidPtr->autoconfigure = new_value; 3518 3519 for(column=0; column<raidPtr->numCol; column++) { 3520 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3521 clabel = raidget_component_label(raidPtr, column); 3522 clabel->autoconfigure = new_value; 3523 raidflush_component_label(raidPtr, column); 3524 } 3525 } 3526 for(column = 0; column < raidPtr->numSpare ; column++) { 3527 sparecol = raidPtr->numCol + column; 3528 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3529 clabel = raidget_component_label(raidPtr, sparecol); 3530 clabel->autoconfigure = new_value; 3531 raidflush_component_label(raidPtr, sparecol); 3532 } 3533 } 3534 return(new_value); 3535 } 3536 3537 int 3538 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3539 { 3540 RF_ComponentLabel_t *clabel; 3541 int column; 3542 int sparecol; 3543 3544 raidPtr->root_partition = new_value; 3545 for(column=0; column<raidPtr->numCol; column++) { 3546 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3547 clabel = raidget_component_label(raidPtr, column); 3548 clabel->root_partition = new_value; 3549 raidflush_component_label(raidPtr, column); 3550 } 3551 } 3552 for(column = 0; column < raidPtr->numSpare ; column++) { 3553 sparecol = raidPtr->numCol + column; 3554 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3555 clabel = raidget_component_label(raidPtr, sparecol); 3556 clabel->root_partition = new_value; 3557 raidflush_component_label(raidPtr, sparecol); 3558 } 3559 } 3560 return(new_value); 3561 } 3562 3563 void 3564 rf_release_all_vps(RF_ConfigSet_t *cset) 3565 { 3566 RF_AutoConfig_t *ac; 3567 3568 ac = cset->ac; 3569 while(ac!=NULL) { 3570 /* Close the vp, and give it back */ 3571 if (ac->vp) { 3572 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3573 VOP_CLOSE(ac->vp, FREAD, NOCRED); 3574 vput(ac->vp); 3575 ac->vp = NULL; 3576 } 3577 ac = ac->next; 3578 } 3579 } 3580 3581 3582 void 3583 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3584 { 3585 RF_AutoConfig_t *ac; 3586 RF_AutoConfig_t *next_ac; 3587 3588 ac = cset->ac; 3589 while(ac!=NULL) { 3590 next_ac = ac->next; 3591 /* nuke the label */ 3592 free(ac->clabel, M_RAIDFRAME); 3593 /* cleanup the config structure */ 3594 free(ac, M_RAIDFRAME); 3595 /* "next.." */ 3596 ac = next_ac; 3597 } 3598 /* and, finally, nuke the config set */ 3599 free(cset, M_RAIDFRAME); 3600 } 3601 3602 3603 void 3604 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3605 { 3606 /* current version number */ 3607 clabel->version = RF_COMPONENT_LABEL_VERSION; 3608 clabel->serial_number = raidPtr->serial_number; 3609 clabel->mod_counter = raidPtr->mod_counter; 3610 3611 clabel->num_rows = 1; 3612 clabel->num_columns = raidPtr->numCol; 3613 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3614 clabel->status = rf_ds_optimal; /* "It's good!" */ 3615 3616 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3617 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3618 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3619 3620 clabel->blockSize = raidPtr->bytesPerSector; 3621 clabel->numBlocks = raidPtr->sectorsPerDisk; 3622 clabel->numBlocksHi = raidPtr->sectorsPerDisk >> 32; 3623 3624 /* XXX not portable */ 3625 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3626 clabel->maxOutstanding = raidPtr->maxOutstanding; 3627 clabel->autoconfigure = raidPtr->autoconfigure; 3628 clabel->root_partition = raidPtr->root_partition; 3629 clabel->last_unit = raidPtr->raidid; 3630 clabel->config_order = raidPtr->config_order; 3631 3632 #ifndef RF_NO_PARITY_MAP 3633 rf_paritymap_init_label(raidPtr->parity_map, clabel); 3634 #endif 3635 } 3636 3637 int 3638 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit) 3639 { 3640 RF_Raid_t *raidPtr; 3641 RF_Config_t *config; 3642 int raidID; 3643 int retcode; 3644 3645 #ifdef DEBUG 3646 printf("RAID autoconfigure\n"); 3647 #endif 3648 3649 retcode = 0; 3650 *unit = -1; 3651 3652 /* 1. Create a config structure */ 3653 3654 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3655 M_RAIDFRAME, 3656 M_NOWAIT); 3657 if (config==NULL) { 3658 printf("Out of mem!?!?\n"); 3659 /* XXX do something more intelligent here. */ 3660 return(1); 3661 } 3662 3663 memset(config, 0, sizeof(RF_Config_t)); 3664 3665 /* 3666 2. Figure out what RAID ID this one is supposed to live at 3667 See if we can get the same RAID dev that it was configured 3668 on last time.. 3669 */ 3670 3671 raidID = cset->ac->clabel->last_unit; 3672 if ((raidID < 0) || (raidID >= numraid)) { 3673 /* let's not wander off into lala land. */ 3674 raidID = numraid - 1; 3675 } 3676 if (raidPtrs[raidID]->valid != 0) { 3677 3678 /* 3679 Nope... Go looking for an alternative... 3680 Start high so we don't immediately use raid0 if that's 3681 not taken. 3682 */ 3683 3684 for(raidID = numraid - 1; raidID >= 0; raidID--) { 3685 if (raidPtrs[raidID]->valid == 0) { 3686 /* can use this one! */ 3687 break; 3688 } 3689 } 3690 } 3691 3692 if (raidID < 0) { 3693 /* punt... */ 3694 printf("Unable to auto configure this set!\n"); 3695 printf("(Out of RAID devs!)\n"); 3696 free(config, M_RAIDFRAME); 3697 return(1); 3698 } 3699 3700 #ifdef DEBUG 3701 printf("Configuring raid%d:\n",raidID); 3702 #endif 3703 3704 raidPtr = raidPtrs[raidID]; 3705 3706 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3707 raidPtr->raidid = raidID; 3708 raidPtr->openings = RAIDOUTSTANDING; 3709 3710 /* 3. Build the configuration structure */ 3711 rf_create_configuration(cset->ac, config, raidPtr); 3712 3713 /* 4. Do the configuration */ 3714 retcode = rf_Configure(raidPtr, config, cset->ac); 3715 3716 if (retcode == 0) { 3717 3718 raidinit(raidPtrs[raidID]); 3719 3720 rf_markalldirty(raidPtrs[raidID]); 3721 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3722 if (cset->ac->clabel->root_partition==1) { 3723 /* everything configured just fine. Make a note 3724 that this set is eligible to be root. */ 3725 cset->rootable = 1; 3726 /* XXX do this here? */ 3727 raidPtrs[raidID]->root_partition = 1; 3728 } 3729 } 3730 3731 /* 5. Cleanup */ 3732 free(config, M_RAIDFRAME); 3733 3734 *unit = raidID; 3735 return(retcode); 3736 } 3737 3738 void 3739 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3740 { 3741 struct buf *bp; 3742 3743 bp = (struct buf *)desc->bp; 3744 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3745 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ)); 3746 } 3747 3748 void 3749 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3750 size_t xmin, size_t xmax) 3751 { 3752 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3753 pool_sethiwat(p, xmax); 3754 pool_prime(p, xmin); 3755 pool_setlowat(p, xmin); 3756 } 3757 3758 /* 3759 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see 3760 * if there is IO pending and if that IO could possibly be done for a 3761 * given RAID set. Returns 0 if IO is waiting and can be done, 1 3762 * otherwise. 3763 * 3764 */ 3765 3766 int 3767 rf_buf_queue_check(int raidid) 3768 { 3769 if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) && 3770 raidPtrs[raidid]->openings > 0) { 3771 /* there is work to do */ 3772 return 0; 3773 } 3774 /* default is nothing to do */ 3775 return 1; 3776 } 3777 3778 int 3779 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr) 3780 { 3781 uint64_t numsecs; 3782 unsigned secsize; 3783 int error; 3784 3785 error = getdisksize(vp, &numsecs, &secsize); 3786 if (error == 0) { 3787 diskPtr->blockSize = secsize; 3788 diskPtr->numBlocks = numsecs - rf_protectedSectors; 3789 diskPtr->partitionSize = numsecs; 3790 return 0; 3791 } 3792 return error; 3793 } 3794 3795 static int 3796 raid_match(device_t self, cfdata_t cfdata, void *aux) 3797 { 3798 return 1; 3799 } 3800 3801 static void 3802 raid_attach(device_t parent, device_t self, void *aux) 3803 { 3804 3805 } 3806 3807 3808 static int 3809 raid_detach(device_t self, int flags) 3810 { 3811 int error; 3812 struct raid_softc *rs = &raid_softc[device_unit(self)]; 3813 3814 if ((error = raidlock(rs)) != 0) 3815 return (error); 3816 3817 error = raid_detach_unlocked(rs); 3818 3819 raidunlock(rs); 3820 3821 return error; 3822 } 3823 3824 static void 3825 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr) 3826 { 3827 prop_dictionary_t disk_info, odisk_info, geom; 3828 disk_info = prop_dictionary_create(); 3829 geom = prop_dictionary_create(); 3830 prop_dictionary_set_uint64(geom, "sectors-per-unit", 3831 raidPtr->totalSectors); 3832 prop_dictionary_set_uint32(geom, "sector-size", 3833 raidPtr->bytesPerSector); 3834 3835 prop_dictionary_set_uint16(geom, "sectors-per-track", 3836 raidPtr->Layout.dataSectorsPerStripe); 3837 prop_dictionary_set_uint16(geom, "tracks-per-cylinder", 3838 4 * raidPtr->numCol); 3839 3840 prop_dictionary_set_uint64(geom, "cylinders-per-unit", 3841 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe * 3842 (4 * raidPtr->numCol))); 3843 3844 prop_dictionary_set(disk_info, "geometry", geom); 3845 prop_object_release(geom); 3846 prop_dictionary_set(device_properties(rs->sc_dev), 3847 "disk-info", disk_info); 3848 odisk_info = rs->sc_dkdev.dk_info; 3849 rs->sc_dkdev.dk_info = disk_info; 3850 if (odisk_info) 3851 prop_object_release(odisk_info); 3852 } 3853 3854 /* 3855 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components. 3856 * We end up returning whatever error was returned by the first cache flush 3857 * that fails. 3858 */ 3859 3860 int 3861 rf_sync_component_caches(RF_Raid_t *raidPtr) 3862 { 3863 int c, sparecol; 3864 int e,error; 3865 int force = 1; 3866 3867 error = 0; 3868 for (c = 0; c < raidPtr->numCol; c++) { 3869 if (raidPtr->Disks[c].status == rf_ds_optimal) { 3870 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC, 3871 &force, FWRITE, NOCRED); 3872 if (e) { 3873 if (e != ENODEV) 3874 printf("raid%d: cache flush to component %s failed.\n", 3875 raidPtr->raidid, raidPtr->Disks[c].devname); 3876 if (error == 0) { 3877 error = e; 3878 } 3879 } 3880 } 3881 } 3882 3883 for( c = 0; c < raidPtr->numSpare ; c++) { 3884 sparecol = raidPtr->numCol + c; 3885 /* Need to ensure that the reconstruct actually completed! */ 3886 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3887 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp, 3888 DIOCCACHESYNC, &force, FWRITE, NOCRED); 3889 if (e) { 3890 if (e != ENODEV) 3891 printf("raid%d: cache flush to component %s failed.\n", 3892 raidPtr->raidid, raidPtr->Disks[sparecol].devname); 3893 if (error == 0) { 3894 error = e; 3895 } 3896 } 3897 } 3898 } 3899 return error; 3900 } 3901