1 /* $NetBSD: rf_netbsdkintf.c,v 1.299 2013/02/18 19:42:54 oster Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Greg Oster; Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * Copyright (c) 1995 Carnegie-Mellon University. 72 * All rights reserved. 73 * 74 * Authors: Mark Holland, Jim Zelenka 75 * 76 * Permission to use, copy, modify and distribute this software and 77 * its documentation is hereby granted, provided that both the copyright 78 * notice and this permission notice appear in all copies of the 79 * software, derivative works or modified versions, and any portions 80 * thereof, and that both notices appear in supporting documentation. 81 * 82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 85 * 86 * Carnegie Mellon requests users of this software to return to 87 * 88 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 89 * School of Computer Science 90 * Carnegie Mellon University 91 * Pittsburgh PA 15213-3890 92 * 93 * any improvements or extensions that they make and grant Carnegie the 94 * rights to redistribute these changes. 95 */ 96 97 /*********************************************************** 98 * 99 * rf_kintf.c -- the kernel interface routines for RAIDframe 100 * 101 ***********************************************************/ 102 103 #include <sys/cdefs.h> 104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.299 2013/02/18 19:42:54 oster Exp $"); 105 106 #ifdef _KERNEL_OPT 107 #include "opt_compat_netbsd.h" 108 #include "opt_raid_autoconfig.h" 109 #include "raid.h" 110 #endif 111 112 #include <sys/param.h> 113 #include <sys/errno.h> 114 #include <sys/pool.h> 115 #include <sys/proc.h> 116 #include <sys/queue.h> 117 #include <sys/disk.h> 118 #include <sys/device.h> 119 #include <sys/stat.h> 120 #include <sys/ioctl.h> 121 #include <sys/fcntl.h> 122 #include <sys/systm.h> 123 #include <sys/vnode.h> 124 #include <sys/disklabel.h> 125 #include <sys/conf.h> 126 #include <sys/buf.h> 127 #include <sys/bufq.h> 128 #include <sys/reboot.h> 129 #include <sys/kauth.h> 130 131 #include <prop/proplib.h> 132 133 #include <dev/raidframe/raidframevar.h> 134 #include <dev/raidframe/raidframeio.h> 135 #include <dev/raidframe/rf_paritymap.h> 136 137 #include "rf_raid.h" 138 #include "rf_copyback.h" 139 #include "rf_dag.h" 140 #include "rf_dagflags.h" 141 #include "rf_desc.h" 142 #include "rf_diskqueue.h" 143 #include "rf_etimer.h" 144 #include "rf_general.h" 145 #include "rf_kintf.h" 146 #include "rf_options.h" 147 #include "rf_driver.h" 148 #include "rf_parityscan.h" 149 #include "rf_threadstuff.h" 150 151 #ifdef COMPAT_50 152 #include "rf_compat50.h" 153 #endif 154 155 #ifdef DEBUG 156 int rf_kdebug_level = 0; 157 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 158 #else /* DEBUG */ 159 #define db1_printf(a) { } 160 #endif /* DEBUG */ 161 162 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 163 164 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 165 static rf_declare_mutex2(rf_sparet_wait_mutex); 166 static rf_declare_cond2(rf_sparet_wait_cv); 167 static rf_declare_cond2(rf_sparet_resp_cv); 168 169 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 170 * spare table */ 171 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 172 * installation process */ 173 #endif 174 175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 176 177 /* prototypes */ 178 static void KernelWakeupFunc(struct buf *); 179 static void InitBP(struct buf *, struct vnode *, unsigned, 180 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 181 void *, int, struct proc *); 182 static void raidinit(RF_Raid_t *); 183 184 void raidattach(int); 185 static int raid_match(device_t, cfdata_t, void *); 186 static void raid_attach(device_t, device_t, void *); 187 static int raid_detach(device_t, int); 188 189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t, 190 daddr_t, daddr_t); 191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t, 192 daddr_t, daddr_t, int); 193 194 static int raidwrite_component_label(unsigned, 195 dev_t, struct vnode *, RF_ComponentLabel_t *); 196 static int raidread_component_label(unsigned, 197 dev_t, struct vnode *, RF_ComponentLabel_t *); 198 199 200 dev_type_open(raidopen); 201 dev_type_close(raidclose); 202 dev_type_read(raidread); 203 dev_type_write(raidwrite); 204 dev_type_ioctl(raidioctl); 205 dev_type_strategy(raidstrategy); 206 dev_type_dump(raiddump); 207 dev_type_size(raidsize); 208 209 const struct bdevsw raid_bdevsw = { 210 raidopen, raidclose, raidstrategy, raidioctl, 211 raiddump, raidsize, D_DISK 212 }; 213 214 const struct cdevsw raid_cdevsw = { 215 raidopen, raidclose, raidread, raidwrite, raidioctl, 216 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 217 }; 218 219 static struct dkdriver rf_dkdriver = { raidstrategy, minphys }; 220 221 /* XXX Not sure if the following should be replacing the raidPtrs above, 222 or if it should be used in conjunction with that... 223 */ 224 225 struct raid_softc { 226 device_t sc_dev; 227 int sc_flags; /* flags */ 228 int sc_cflags; /* configuration flags */ 229 uint64_t sc_size; /* size of the raid device */ 230 char sc_xname[20]; /* XXX external name */ 231 struct disk sc_dkdev; /* generic disk device info */ 232 struct bufq_state *buf_queue; /* used for the device queue */ 233 }; 234 /* sc_flags */ 235 #define RAIDF_INITED 0x01 /* unit has been initialized */ 236 #define RAIDF_WLABEL 0x02 /* label area is writable */ 237 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 238 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */ 239 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 240 #define RAIDF_LOCKED 0x80 /* unit is locked */ 241 242 #define raidunit(x) DISKUNIT(x) 243 int numraid = 0; 244 245 extern struct cfdriver raid_cd; 246 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc), 247 raid_match, raid_attach, raid_detach, NULL, NULL, NULL, 248 DVF_DETACH_SHUTDOWN); 249 250 /* 251 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 252 * Be aware that large numbers can allow the driver to consume a lot of 253 * kernel memory, especially on writes, and in degraded mode reads. 254 * 255 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 256 * a single 64K write will typically require 64K for the old data, 257 * 64K for the old parity, and 64K for the new parity, for a total 258 * of 192K (if the parity buffer is not re-used immediately). 259 * Even it if is used immediately, that's still 128K, which when multiplied 260 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 261 * 262 * Now in degraded mode, for example, a 64K read on the above setup may 263 * require data reconstruction, which will require *all* of the 4 remaining 264 * disks to participate -- 4 * 32K/disk == 128K again. 265 */ 266 267 #ifndef RAIDOUTSTANDING 268 #define RAIDOUTSTANDING 6 269 #endif 270 271 #define RAIDLABELDEV(dev) \ 272 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 273 274 /* declared here, and made public, for the benefit of KVM stuff.. */ 275 struct raid_softc *raid_softc; 276 277 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 278 struct disklabel *); 279 static void raidgetdisklabel(dev_t); 280 static void raidmakedisklabel(struct raid_softc *); 281 282 static int raidlock(struct raid_softc *); 283 static void raidunlock(struct raid_softc *); 284 285 static int raid_detach_unlocked(struct raid_softc *); 286 287 static void rf_markalldirty(RF_Raid_t *); 288 static void rf_set_properties(struct raid_softc *, RF_Raid_t *); 289 290 void rf_ReconThread(struct rf_recon_req *); 291 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 292 void rf_CopybackThread(RF_Raid_t *raidPtr); 293 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 294 int rf_autoconfig(device_t); 295 void rf_buildroothack(RF_ConfigSet_t *); 296 297 RF_AutoConfig_t *rf_find_raid_components(void); 298 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 299 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 300 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t); 301 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 302 int rf_set_autoconfig(RF_Raid_t *, int); 303 int rf_set_rootpartition(RF_Raid_t *, int); 304 void rf_release_all_vps(RF_ConfigSet_t *); 305 void rf_cleanup_config_set(RF_ConfigSet_t *); 306 int rf_have_enough_components(RF_ConfigSet_t *); 307 int rf_auto_config_set(RF_ConfigSet_t *, int *); 308 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t); 309 310 /* 311 * Debugging, mostly. Set to 0 to not allow autoconfig to take place. 312 * Note that this is overridden by having RAID_AUTOCONFIG as an option 313 * in the kernel config file. 314 */ 315 #ifdef RAID_AUTOCONFIG 316 int raidautoconfig = 1; 317 #else 318 int raidautoconfig = 0; 319 #endif 320 static bool raidautoconfigdone = false; 321 322 struct RF_Pools_s rf_pools; 323 324 void 325 raidattach(int num) 326 { 327 int raidID; 328 int i, rc; 329 330 aprint_debug("raidattach: Asked for %d units\n", num); 331 332 if (num <= 0) { 333 #ifdef DIAGNOSTIC 334 panic("raidattach: count <= 0"); 335 #endif 336 return; 337 } 338 /* This is where all the initialization stuff gets done. */ 339 340 numraid = num; 341 342 /* Make some space for requested number of units... */ 343 344 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **)); 345 if (raidPtrs == NULL) { 346 panic("raidPtrs is NULL!!"); 347 } 348 349 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 350 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM); 351 rf_init_cond2(rf_sparet_wait_cv, "sparetw"); 352 rf_init_cond2(rf_sparet_resp_cv, "rfgst"); 353 354 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 355 #endif 356 357 for (i = 0; i < num; i++) 358 raidPtrs[i] = NULL; 359 rc = rf_BootRaidframe(); 360 if (rc == 0) 361 aprint_verbose("Kernelized RAIDframe activated\n"); 362 else 363 panic("Serious error booting RAID!!"); 364 365 /* put together some datastructures like the CCD device does.. This 366 * lets us lock the device and what-not when it gets opened. */ 367 368 raid_softc = (struct raid_softc *) 369 malloc(num * sizeof(struct raid_softc), 370 M_RAIDFRAME, M_NOWAIT); 371 if (raid_softc == NULL) { 372 aprint_error("WARNING: no memory for RAIDframe driver\n"); 373 return; 374 } 375 376 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 377 378 for (raidID = 0; raidID < num; raidID++) { 379 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK); 380 381 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t), 382 (RF_Raid_t *)); 383 if (raidPtrs[raidID] == NULL) { 384 aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID); 385 numraid = raidID; 386 return; 387 } 388 } 389 390 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) { 391 aprint_error("raidattach: config_cfattach_attach failed?\n"); 392 } 393 394 raidautoconfigdone = false; 395 396 /* 397 * Register a finalizer which will be used to auto-config RAID 398 * sets once all real hardware devices have been found. 399 */ 400 if (config_finalize_register(NULL, rf_autoconfig) != 0) 401 aprint_error("WARNING: unable to register RAIDframe finalizer\n"); 402 } 403 404 int 405 rf_autoconfig(device_t self) 406 { 407 RF_AutoConfig_t *ac_list; 408 RF_ConfigSet_t *config_sets; 409 410 if (!raidautoconfig || raidautoconfigdone == true) 411 return (0); 412 413 /* XXX This code can only be run once. */ 414 raidautoconfigdone = true; 415 416 /* 1. locate all RAID components on the system */ 417 aprint_debug("Searching for RAID components...\n"); 418 ac_list = rf_find_raid_components(); 419 420 /* 2. Sort them into their respective sets. */ 421 config_sets = rf_create_auto_sets(ac_list); 422 423 /* 424 * 3. Evaluate each set and configure the valid ones. 425 * This gets done in rf_buildroothack(). 426 */ 427 rf_buildroothack(config_sets); 428 429 return 1; 430 } 431 432 void 433 rf_buildroothack(RF_ConfigSet_t *config_sets) 434 { 435 RF_ConfigSet_t *cset; 436 RF_ConfigSet_t *next_cset; 437 int retcode; 438 int raidID; 439 int rootID; 440 int col; 441 int num_root; 442 char *devname; 443 444 rootID = 0; 445 num_root = 0; 446 cset = config_sets; 447 while (cset != NULL) { 448 next_cset = cset->next; 449 if (rf_have_enough_components(cset) && 450 cset->ac->clabel->autoconfigure==1) { 451 retcode = rf_auto_config_set(cset,&raidID); 452 if (!retcode) { 453 aprint_debug("raid%d: configured ok\n", raidID); 454 if (cset->rootable) { 455 rootID = raidID; 456 num_root++; 457 } 458 } else { 459 /* The autoconfig didn't work :( */ 460 aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 461 rf_release_all_vps(cset); 462 } 463 } else { 464 /* we're not autoconfiguring this set... 465 release the associated resources */ 466 rf_release_all_vps(cset); 467 } 468 /* cleanup */ 469 rf_cleanup_config_set(cset); 470 cset = next_cset; 471 } 472 473 /* if the user has specified what the root device should be 474 then we don't touch booted_device or boothowto... */ 475 476 if (rootspec != NULL) 477 return; 478 479 /* we found something bootable... */ 480 481 if (num_root == 1) { 482 if (raid_softc[rootID].sc_dkdev.dk_nwedges != 0) { 483 /* XXX: How do we find the real root partition? */ 484 char cname[sizeof(cset->ac->devname)]; 485 snprintf(cname, sizeof(cname), "%s%c", 486 device_xname(raid_softc[rootID].sc_dev), 'a'); 487 booted_device = dkwedge_find_by_wname(cname); 488 } else 489 booted_device = raid_softc[rootID].sc_dev; 490 } else if (num_root > 1) { 491 492 /* 493 * Maybe the MD code can help. If it cannot, then 494 * setroot() will discover that we have no 495 * booted_device and will ask the user if nothing was 496 * hardwired in the kernel config file 497 */ 498 499 if (booted_device == NULL) 500 cpu_rootconf(); 501 if (booted_device == NULL) 502 return; 503 504 num_root = 0; 505 for (raidID = 0; raidID < numraid; raidID++) { 506 if (raidPtrs[raidID]->valid == 0) 507 continue; 508 509 if (raidPtrs[raidID]->root_partition == 0) 510 continue; 511 512 for (col = 0; col < raidPtrs[raidID]->numCol; col++) { 513 devname = raidPtrs[raidID]->Disks[col].devname; 514 devname += sizeof("/dev/") - 1; 515 if (strncmp(devname, device_xname(booted_device), 516 strlen(device_xname(booted_device))) != 0) 517 continue; 518 aprint_debug("raid%d includes boot device %s\n", 519 raidID, devname); 520 num_root++; 521 rootID = raidID; 522 } 523 } 524 525 if (num_root == 1) { 526 booted_device = raid_softc[rootID].sc_dev; 527 } else { 528 /* we can't guess.. require the user to answer... */ 529 boothowto |= RB_ASKNAME; 530 } 531 } 532 } 533 534 535 int 536 raidsize(dev_t dev) 537 { 538 struct raid_softc *rs; 539 struct disklabel *lp; 540 int part, unit, omask, size; 541 542 unit = raidunit(dev); 543 if (unit >= numraid) 544 return (-1); 545 rs = &raid_softc[unit]; 546 547 if ((rs->sc_flags & RAIDF_INITED) == 0) 548 return (-1); 549 550 part = DISKPART(dev); 551 omask = rs->sc_dkdev.dk_openmask & (1 << part); 552 lp = rs->sc_dkdev.dk_label; 553 554 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp)) 555 return (-1); 556 557 if (lp->d_partitions[part].p_fstype != FS_SWAP) 558 size = -1; 559 else 560 size = lp->d_partitions[part].p_size * 561 (lp->d_secsize / DEV_BSIZE); 562 563 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp)) 564 return (-1); 565 566 return (size); 567 568 } 569 570 int 571 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 572 { 573 int unit = raidunit(dev); 574 struct raid_softc *rs; 575 const struct bdevsw *bdev; 576 struct disklabel *lp; 577 RF_Raid_t *raidPtr; 578 daddr_t offset; 579 int part, c, sparecol, j, scol, dumpto; 580 int error = 0; 581 582 if (unit >= numraid) 583 return (ENXIO); 584 585 rs = &raid_softc[unit]; 586 raidPtr = raidPtrs[unit]; 587 588 if ((rs->sc_flags & RAIDF_INITED) == 0) 589 return ENXIO; 590 591 /* we only support dumping to RAID 1 sets */ 592 if (raidPtr->Layout.numDataCol != 1 || 593 raidPtr->Layout.numParityCol != 1) 594 return EINVAL; 595 596 597 if ((error = raidlock(rs)) != 0) 598 return error; 599 600 if (size % DEV_BSIZE != 0) { 601 error = EINVAL; 602 goto out; 603 } 604 605 if (blkno + size / DEV_BSIZE > rs->sc_size) { 606 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > " 607 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno, 608 size / DEV_BSIZE, rs->sc_size); 609 error = EINVAL; 610 goto out; 611 } 612 613 part = DISKPART(dev); 614 lp = rs->sc_dkdev.dk_label; 615 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS; 616 617 /* figure out what device is alive.. */ 618 619 /* 620 Look for a component to dump to. The preference for the 621 component to dump to is as follows: 622 1) the master 623 2) a used_spare of the master 624 3) the slave 625 4) a used_spare of the slave 626 */ 627 628 dumpto = -1; 629 for (c = 0; c < raidPtr->numCol; c++) { 630 if (raidPtr->Disks[c].status == rf_ds_optimal) { 631 /* this might be the one */ 632 dumpto = c; 633 break; 634 } 635 } 636 637 /* 638 At this point we have possibly selected a live master or a 639 live slave. We now check to see if there is a spared 640 master (or a spared slave), if we didn't find a live master 641 or a live slave. 642 */ 643 644 for (c = 0; c < raidPtr->numSpare; c++) { 645 sparecol = raidPtr->numCol + c; 646 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 647 /* How about this one? */ 648 scol = -1; 649 for(j=0;j<raidPtr->numCol;j++) { 650 if (raidPtr->Disks[j].spareCol == sparecol) { 651 scol = j; 652 break; 653 } 654 } 655 if (scol == 0) { 656 /* 657 We must have found a spared master! 658 We'll take that over anything else 659 found so far. (We couldn't have 660 found a real master before, since 661 this is a used spare, and it's 662 saying that it's replacing the 663 master.) On reboot (with 664 autoconfiguration turned on) 665 sparecol will become the 1st 666 component (component0) of this set. 667 */ 668 dumpto = sparecol; 669 break; 670 } else if (scol != -1) { 671 /* 672 Must be a spared slave. We'll dump 673 to that if we havn't found anything 674 else so far. 675 */ 676 if (dumpto == -1) 677 dumpto = sparecol; 678 } 679 } 680 } 681 682 if (dumpto == -1) { 683 /* we couldn't find any live components to dump to!?!? 684 */ 685 error = EINVAL; 686 goto out; 687 } 688 689 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 690 691 /* 692 Note that blkno is relative to this particular partition. 693 By adding the offset of this partition in the RAID 694 set, and also adding RF_PROTECTED_SECTORS, we get a 695 value that is relative to the partition used for the 696 underlying component. 697 */ 698 699 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 700 blkno + offset, va, size); 701 702 out: 703 raidunlock(rs); 704 705 return error; 706 } 707 /* ARGSUSED */ 708 int 709 raidopen(dev_t dev, int flags, int fmt, 710 struct lwp *l) 711 { 712 int unit = raidunit(dev); 713 struct raid_softc *rs; 714 struct disklabel *lp; 715 int part, pmask; 716 int error = 0; 717 718 if (unit >= numraid) 719 return (ENXIO); 720 rs = &raid_softc[unit]; 721 722 if ((error = raidlock(rs)) != 0) 723 return (error); 724 725 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) { 726 error = EBUSY; 727 goto bad; 728 } 729 730 lp = rs->sc_dkdev.dk_label; 731 732 part = DISKPART(dev); 733 734 /* 735 * If there are wedges, and this is not RAW_PART, then we 736 * need to fail. 737 */ 738 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 739 error = EBUSY; 740 goto bad; 741 } 742 pmask = (1 << part); 743 744 if ((rs->sc_flags & RAIDF_INITED) && 745 (rs->sc_dkdev.dk_openmask == 0)) 746 raidgetdisklabel(dev); 747 748 /* make sure that this partition exists */ 749 750 if (part != RAW_PART) { 751 if (((rs->sc_flags & RAIDF_INITED) == 0) || 752 ((part >= lp->d_npartitions) || 753 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 754 error = ENXIO; 755 goto bad; 756 } 757 } 758 /* Prevent this unit from being unconfigured while open. */ 759 switch (fmt) { 760 case S_IFCHR: 761 rs->sc_dkdev.dk_copenmask |= pmask; 762 break; 763 764 case S_IFBLK: 765 rs->sc_dkdev.dk_bopenmask |= pmask; 766 break; 767 } 768 769 if ((rs->sc_dkdev.dk_openmask == 0) && 770 ((rs->sc_flags & RAIDF_INITED) != 0)) { 771 /* First one... mark things as dirty... Note that we *MUST* 772 have done a configure before this. I DO NOT WANT TO BE 773 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 774 THAT THEY BELONG TOGETHER!!!!! */ 775 /* XXX should check to see if we're only open for reading 776 here... If so, we needn't do this, but then need some 777 other way of keeping track of what's happened.. */ 778 779 rf_markalldirty(raidPtrs[unit]); 780 } 781 782 783 rs->sc_dkdev.dk_openmask = 784 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 785 786 bad: 787 raidunlock(rs); 788 789 return (error); 790 791 792 } 793 /* ARGSUSED */ 794 int 795 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 796 { 797 int unit = raidunit(dev); 798 struct raid_softc *rs; 799 int error = 0; 800 int part; 801 802 if (unit >= numraid) 803 return (ENXIO); 804 rs = &raid_softc[unit]; 805 806 if ((error = raidlock(rs)) != 0) 807 return (error); 808 809 part = DISKPART(dev); 810 811 /* ...that much closer to allowing unconfiguration... */ 812 switch (fmt) { 813 case S_IFCHR: 814 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 815 break; 816 817 case S_IFBLK: 818 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 819 break; 820 } 821 rs->sc_dkdev.dk_openmask = 822 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 823 824 if ((rs->sc_dkdev.dk_openmask == 0) && 825 ((rs->sc_flags & RAIDF_INITED) != 0)) { 826 /* Last one... device is not unconfigured yet. 827 Device shutdown has taken care of setting the 828 clean bits if RAIDF_INITED is not set 829 mark things as clean... */ 830 831 rf_update_component_labels(raidPtrs[unit], 832 RF_FINAL_COMPONENT_UPDATE); 833 834 /* If the kernel is shutting down, it will detach 835 * this RAID set soon enough. 836 */ 837 } 838 839 raidunlock(rs); 840 return (0); 841 842 } 843 844 void 845 raidstrategy(struct buf *bp) 846 { 847 unsigned int raidID = raidunit(bp->b_dev); 848 RF_Raid_t *raidPtr; 849 struct raid_softc *rs = &raid_softc[raidID]; 850 int wlabel; 851 852 if ((rs->sc_flags & RAIDF_INITED) ==0) { 853 bp->b_error = ENXIO; 854 goto done; 855 } 856 if (raidID >= numraid || !raidPtrs[raidID]) { 857 bp->b_error = ENODEV; 858 goto done; 859 } 860 raidPtr = raidPtrs[raidID]; 861 if (!raidPtr->valid) { 862 bp->b_error = ENODEV; 863 goto done; 864 } 865 if (bp->b_bcount == 0) { 866 db1_printf(("b_bcount is zero..\n")); 867 goto done; 868 } 869 870 /* 871 * Do bounds checking and adjust transfer. If there's an 872 * error, the bounds check will flag that for us. 873 */ 874 875 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 876 if (DISKPART(bp->b_dev) == RAW_PART) { 877 uint64_t size; /* device size in DEV_BSIZE unit */ 878 879 if (raidPtr->logBytesPerSector > DEV_BSHIFT) { 880 size = raidPtr->totalSectors << 881 (raidPtr->logBytesPerSector - DEV_BSHIFT); 882 } else { 883 size = raidPtr->totalSectors >> 884 (DEV_BSHIFT - raidPtr->logBytesPerSector); 885 } 886 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) { 887 goto done; 888 } 889 } else { 890 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 891 db1_printf(("Bounds check failed!!:%d %d\n", 892 (int) bp->b_blkno, (int) wlabel)); 893 goto done; 894 } 895 } 896 897 rf_lock_mutex2(raidPtr->iodone_lock); 898 899 bp->b_resid = 0; 900 901 /* stuff it onto our queue */ 902 bufq_put(rs->buf_queue, bp); 903 904 /* scheduled the IO to happen at the next convenient time */ 905 rf_signal_cond2(raidPtr->iodone_cv); 906 rf_unlock_mutex2(raidPtr->iodone_lock); 907 908 return; 909 910 done: 911 bp->b_resid = bp->b_bcount; 912 biodone(bp); 913 } 914 /* ARGSUSED */ 915 int 916 raidread(dev_t dev, struct uio *uio, int flags) 917 { 918 int unit = raidunit(dev); 919 struct raid_softc *rs; 920 921 if (unit >= numraid) 922 return (ENXIO); 923 rs = &raid_softc[unit]; 924 925 if ((rs->sc_flags & RAIDF_INITED) == 0) 926 return (ENXIO); 927 928 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 929 930 } 931 /* ARGSUSED */ 932 int 933 raidwrite(dev_t dev, struct uio *uio, int flags) 934 { 935 int unit = raidunit(dev); 936 struct raid_softc *rs; 937 938 if (unit >= numraid) 939 return (ENXIO); 940 rs = &raid_softc[unit]; 941 942 if ((rs->sc_flags & RAIDF_INITED) == 0) 943 return (ENXIO); 944 945 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 946 947 } 948 949 static int 950 raid_detach_unlocked(struct raid_softc *rs) 951 { 952 int error; 953 RF_Raid_t *raidPtr; 954 955 raidPtr = raidPtrs[device_unit(rs->sc_dev)]; 956 957 /* 958 * If somebody has a partition mounted, we shouldn't 959 * shutdown. 960 */ 961 if (rs->sc_dkdev.dk_openmask != 0) 962 return EBUSY; 963 964 if ((rs->sc_flags & RAIDF_INITED) == 0) 965 ; /* not initialized: nothing to do */ 966 else if ((error = rf_Shutdown(raidPtr)) != 0) 967 return error; 968 else 969 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN); 970 971 /* Detach the disk. */ 972 dkwedge_delall(&rs->sc_dkdev); 973 disk_detach(&rs->sc_dkdev); 974 disk_destroy(&rs->sc_dkdev); 975 976 aprint_normal_dev(rs->sc_dev, "detached\n"); 977 978 return 0; 979 } 980 981 int 982 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 983 { 984 int unit = raidunit(dev); 985 int error = 0; 986 int part, pmask, s; 987 cfdata_t cf; 988 struct raid_softc *rs; 989 RF_Config_t *k_cfg, *u_cfg; 990 RF_Raid_t *raidPtr; 991 RF_RaidDisk_t *diskPtr; 992 RF_AccTotals_t *totals; 993 RF_DeviceConfig_t *d_cfg, **ucfgp; 994 u_char *specific_buf; 995 int retcode = 0; 996 int column; 997 /* int raidid; */ 998 struct rf_recon_req *rrcopy, *rr; 999 RF_ComponentLabel_t *clabel; 1000 RF_ComponentLabel_t *ci_label; 1001 RF_ComponentLabel_t **clabel_ptr; 1002 RF_SingleComponent_t *sparePtr,*componentPtr; 1003 RF_SingleComponent_t component; 1004 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 1005 int i, j, d; 1006 #ifdef __HAVE_OLD_DISKLABEL 1007 struct disklabel newlabel; 1008 #endif 1009 struct dkwedge_info *dkw; 1010 1011 if (unit >= numraid) 1012 return (ENXIO); 1013 rs = &raid_softc[unit]; 1014 raidPtr = raidPtrs[unit]; 1015 1016 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev, 1017 (int) DISKPART(dev), (int) unit, cmd)); 1018 1019 /* Must be open for writes for these commands... */ 1020 switch (cmd) { 1021 #ifdef DIOCGSECTORSIZE 1022 case DIOCGSECTORSIZE: 1023 *(u_int *)data = raidPtr->bytesPerSector; 1024 return 0; 1025 case DIOCGMEDIASIZE: 1026 *(off_t *)data = 1027 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector; 1028 return 0; 1029 #endif 1030 case DIOCSDINFO: 1031 case DIOCWDINFO: 1032 #ifdef __HAVE_OLD_DISKLABEL 1033 case ODIOCWDINFO: 1034 case ODIOCSDINFO: 1035 #endif 1036 case DIOCWLABEL: 1037 case DIOCAWEDGE: 1038 case DIOCDWEDGE: 1039 case DIOCSSTRATEGY: 1040 if ((flag & FWRITE) == 0) 1041 return (EBADF); 1042 } 1043 1044 /* Must be initialized for these... */ 1045 switch (cmd) { 1046 case DIOCGDINFO: 1047 case DIOCSDINFO: 1048 case DIOCWDINFO: 1049 #ifdef __HAVE_OLD_DISKLABEL 1050 case ODIOCGDINFO: 1051 case ODIOCWDINFO: 1052 case ODIOCSDINFO: 1053 case ODIOCGDEFLABEL: 1054 #endif 1055 case DIOCGPART: 1056 case DIOCWLABEL: 1057 case DIOCGDEFLABEL: 1058 case DIOCAWEDGE: 1059 case DIOCDWEDGE: 1060 case DIOCLWEDGES: 1061 case DIOCCACHESYNC: 1062 case RAIDFRAME_SHUTDOWN: 1063 case RAIDFRAME_REWRITEPARITY: 1064 case RAIDFRAME_GET_INFO: 1065 case RAIDFRAME_RESET_ACCTOTALS: 1066 case RAIDFRAME_GET_ACCTOTALS: 1067 case RAIDFRAME_KEEP_ACCTOTALS: 1068 case RAIDFRAME_GET_SIZE: 1069 case RAIDFRAME_FAIL_DISK: 1070 case RAIDFRAME_COPYBACK: 1071 case RAIDFRAME_CHECK_RECON_STATUS: 1072 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1073 case RAIDFRAME_GET_COMPONENT_LABEL: 1074 case RAIDFRAME_SET_COMPONENT_LABEL: 1075 case RAIDFRAME_ADD_HOT_SPARE: 1076 case RAIDFRAME_REMOVE_HOT_SPARE: 1077 case RAIDFRAME_INIT_LABELS: 1078 case RAIDFRAME_REBUILD_IN_PLACE: 1079 case RAIDFRAME_CHECK_PARITY: 1080 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1081 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1082 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1083 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1084 case RAIDFRAME_SET_AUTOCONFIG: 1085 case RAIDFRAME_SET_ROOT: 1086 case RAIDFRAME_DELETE_COMPONENT: 1087 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1088 case RAIDFRAME_PARITYMAP_STATUS: 1089 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1090 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1091 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1092 case DIOCGSTRATEGY: 1093 case DIOCSSTRATEGY: 1094 if ((rs->sc_flags & RAIDF_INITED) == 0) 1095 return (ENXIO); 1096 } 1097 1098 switch (cmd) { 1099 #ifdef COMPAT_50 1100 case RAIDFRAME_GET_INFO50: 1101 return rf_get_info50(raidPtr, data); 1102 1103 case RAIDFRAME_CONFIGURE50: 1104 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0) 1105 return retcode; 1106 goto config; 1107 #endif 1108 /* configure the system */ 1109 case RAIDFRAME_CONFIGURE: 1110 1111 if (raidPtr->valid) { 1112 /* There is a valid RAID set running on this unit! */ 1113 printf("raid%d: Device already configured!\n",unit); 1114 return(EINVAL); 1115 } 1116 1117 /* copy-in the configuration information */ 1118 /* data points to a pointer to the configuration structure */ 1119 1120 u_cfg = *((RF_Config_t **) data); 1121 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 1122 if (k_cfg == NULL) { 1123 return (ENOMEM); 1124 } 1125 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 1126 if (retcode) { 1127 RF_Free(k_cfg, sizeof(RF_Config_t)); 1128 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 1129 retcode)); 1130 return (retcode); 1131 } 1132 goto config; 1133 config: 1134 /* allocate a buffer for the layout-specific data, and copy it 1135 * in */ 1136 if (k_cfg->layoutSpecificSize) { 1137 if (k_cfg->layoutSpecificSize > 10000) { 1138 /* sanity check */ 1139 RF_Free(k_cfg, sizeof(RF_Config_t)); 1140 return (EINVAL); 1141 } 1142 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 1143 (u_char *)); 1144 if (specific_buf == NULL) { 1145 RF_Free(k_cfg, sizeof(RF_Config_t)); 1146 return (ENOMEM); 1147 } 1148 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1149 k_cfg->layoutSpecificSize); 1150 if (retcode) { 1151 RF_Free(k_cfg, sizeof(RF_Config_t)); 1152 RF_Free(specific_buf, 1153 k_cfg->layoutSpecificSize); 1154 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 1155 retcode)); 1156 return (retcode); 1157 } 1158 } else 1159 specific_buf = NULL; 1160 k_cfg->layoutSpecific = specific_buf; 1161 1162 /* should do some kind of sanity check on the configuration. 1163 * Store the sum of all the bytes in the last byte? */ 1164 1165 /* configure the system */ 1166 1167 /* 1168 * Clear the entire RAID descriptor, just to make sure 1169 * there is no stale data left in the case of a 1170 * reconfiguration 1171 */ 1172 memset(raidPtr, 0, sizeof(*raidPtr)); 1173 raidPtr->raidid = unit; 1174 1175 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1176 1177 if (retcode == 0) { 1178 1179 /* allow this many simultaneous IO's to 1180 this RAID device */ 1181 raidPtr->openings = RAIDOUTSTANDING; 1182 1183 raidinit(raidPtr); 1184 rf_markalldirty(raidPtr); 1185 } 1186 /* free the buffers. No return code here. */ 1187 if (k_cfg->layoutSpecificSize) { 1188 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1189 } 1190 RF_Free(k_cfg, sizeof(RF_Config_t)); 1191 1192 return (retcode); 1193 1194 /* shutdown the system */ 1195 case RAIDFRAME_SHUTDOWN: 1196 1197 part = DISKPART(dev); 1198 pmask = (1 << part); 1199 1200 if ((error = raidlock(rs)) != 0) 1201 return (error); 1202 1203 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 1204 ((rs->sc_dkdev.dk_bopenmask & pmask) && 1205 (rs->sc_dkdev.dk_copenmask & pmask))) 1206 retcode = EBUSY; 1207 else { 1208 rs->sc_flags |= RAIDF_SHUTDOWN; 1209 rs->sc_dkdev.dk_copenmask &= ~pmask; 1210 rs->sc_dkdev.dk_bopenmask &= ~pmask; 1211 rs->sc_dkdev.dk_openmask &= ~pmask; 1212 retcode = 0; 1213 } 1214 1215 raidunlock(rs); 1216 1217 if (retcode != 0) 1218 return retcode; 1219 1220 /* free the pseudo device attach bits */ 1221 1222 cf = device_cfdata(rs->sc_dev); 1223 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0) 1224 free(cf, M_RAIDFRAME); 1225 1226 return (retcode); 1227 case RAIDFRAME_GET_COMPONENT_LABEL: 1228 clabel_ptr = (RF_ComponentLabel_t **) data; 1229 /* need to read the component label for the disk indicated 1230 by row,column in clabel */ 1231 1232 /* 1233 * Perhaps there should be an option to skip the in-core 1234 * copy and hit the disk, as with disklabel(8). 1235 */ 1236 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *)); 1237 1238 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel)); 1239 1240 if (retcode) { 1241 RF_Free(clabel, sizeof(*clabel)); 1242 return retcode; 1243 } 1244 1245 clabel->row = 0; /* Don't allow looking at anything else.*/ 1246 1247 column = clabel->column; 1248 1249 if ((column < 0) || (column >= raidPtr->numCol + 1250 raidPtr->numSpare)) { 1251 RF_Free(clabel, sizeof(*clabel)); 1252 return EINVAL; 1253 } 1254 1255 RF_Free(clabel, sizeof(*clabel)); 1256 1257 clabel = raidget_component_label(raidPtr, column); 1258 1259 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr)); 1260 1261 #if 0 1262 case RAIDFRAME_SET_COMPONENT_LABEL: 1263 clabel = (RF_ComponentLabel_t *) data; 1264 1265 /* XXX check the label for valid stuff... */ 1266 /* Note that some things *should not* get modified -- 1267 the user should be re-initing the labels instead of 1268 trying to patch things. 1269 */ 1270 1271 raidid = raidPtr->raidid; 1272 #ifdef DEBUG 1273 printf("raid%d: Got component label:\n", raidid); 1274 printf("raid%d: Version: %d\n", raidid, clabel->version); 1275 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1276 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1277 printf("raid%d: Column: %d\n", raidid, clabel->column); 1278 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1279 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1280 printf("raid%d: Status: %d\n", raidid, clabel->status); 1281 #endif 1282 clabel->row = 0; 1283 column = clabel->column; 1284 1285 if ((column < 0) || (column >= raidPtr->numCol)) { 1286 return(EINVAL); 1287 } 1288 1289 /* XXX this isn't allowed to do anything for now :-) */ 1290 1291 /* XXX and before it is, we need to fill in the rest 1292 of the fields!?!?!?! */ 1293 memcpy(raidget_component_label(raidPtr, column), 1294 clabel, sizeof(*clabel)); 1295 raidflush_component_label(raidPtr, column); 1296 return (0); 1297 #endif 1298 1299 case RAIDFRAME_INIT_LABELS: 1300 clabel = (RF_ComponentLabel_t *) data; 1301 /* 1302 we only want the serial number from 1303 the above. We get all the rest of the information 1304 from the config that was used to create this RAID 1305 set. 1306 */ 1307 1308 raidPtr->serial_number = clabel->serial_number; 1309 1310 for(column=0;column<raidPtr->numCol;column++) { 1311 diskPtr = &raidPtr->Disks[column]; 1312 if (!RF_DEAD_DISK(diskPtr->status)) { 1313 ci_label = raidget_component_label(raidPtr, 1314 column); 1315 /* Zeroing this is important. */ 1316 memset(ci_label, 0, sizeof(*ci_label)); 1317 raid_init_component_label(raidPtr, ci_label); 1318 ci_label->serial_number = 1319 raidPtr->serial_number; 1320 ci_label->row = 0; /* we dont' pretend to support more */ 1321 rf_component_label_set_partitionsize(ci_label, 1322 diskPtr->partitionSize); 1323 ci_label->column = column; 1324 raidflush_component_label(raidPtr, column); 1325 } 1326 /* XXXjld what about the spares? */ 1327 } 1328 1329 return (retcode); 1330 case RAIDFRAME_SET_AUTOCONFIG: 1331 d = rf_set_autoconfig(raidPtr, *(int *) data); 1332 printf("raid%d: New autoconfig value is: %d\n", 1333 raidPtr->raidid, d); 1334 *(int *) data = d; 1335 return (retcode); 1336 1337 case RAIDFRAME_SET_ROOT: 1338 d = rf_set_rootpartition(raidPtr, *(int *) data); 1339 printf("raid%d: New rootpartition value is: %d\n", 1340 raidPtr->raidid, d); 1341 *(int *) data = d; 1342 return (retcode); 1343 1344 /* initialize all parity */ 1345 case RAIDFRAME_REWRITEPARITY: 1346 1347 if (raidPtr->Layout.map->faultsTolerated == 0) { 1348 /* Parity for RAID 0 is trivially correct */ 1349 raidPtr->parity_good = RF_RAID_CLEAN; 1350 return(0); 1351 } 1352 1353 if (raidPtr->parity_rewrite_in_progress == 1) { 1354 /* Re-write is already in progress! */ 1355 return(EINVAL); 1356 } 1357 1358 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1359 rf_RewriteParityThread, 1360 raidPtr,"raid_parity"); 1361 return (retcode); 1362 1363 1364 case RAIDFRAME_ADD_HOT_SPARE: 1365 sparePtr = (RF_SingleComponent_t *) data; 1366 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t)); 1367 retcode = rf_add_hot_spare(raidPtr, &component); 1368 return(retcode); 1369 1370 case RAIDFRAME_REMOVE_HOT_SPARE: 1371 return(retcode); 1372 1373 case RAIDFRAME_DELETE_COMPONENT: 1374 componentPtr = (RF_SingleComponent_t *)data; 1375 memcpy( &component, componentPtr, 1376 sizeof(RF_SingleComponent_t)); 1377 retcode = rf_delete_component(raidPtr, &component); 1378 return(retcode); 1379 1380 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1381 componentPtr = (RF_SingleComponent_t *)data; 1382 memcpy( &component, componentPtr, 1383 sizeof(RF_SingleComponent_t)); 1384 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1385 return(retcode); 1386 1387 case RAIDFRAME_REBUILD_IN_PLACE: 1388 1389 if (raidPtr->Layout.map->faultsTolerated == 0) { 1390 /* Can't do this on a RAID 0!! */ 1391 return(EINVAL); 1392 } 1393 1394 if (raidPtr->recon_in_progress == 1) { 1395 /* a reconstruct is already in progress! */ 1396 return(EINVAL); 1397 } 1398 1399 componentPtr = (RF_SingleComponent_t *) data; 1400 memcpy( &component, componentPtr, 1401 sizeof(RF_SingleComponent_t)); 1402 component.row = 0; /* we don't support any more */ 1403 column = component.column; 1404 1405 if ((column < 0) || (column >= raidPtr->numCol)) { 1406 return(EINVAL); 1407 } 1408 1409 rf_lock_mutex2(raidPtr->mutex); 1410 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1411 (raidPtr->numFailures > 0)) { 1412 /* XXX 0 above shouldn't be constant!!! */ 1413 /* some component other than this has failed. 1414 Let's not make things worse than they already 1415 are... */ 1416 printf("raid%d: Unable to reconstruct to disk at:\n", 1417 raidPtr->raidid); 1418 printf("raid%d: Col: %d Too many failures.\n", 1419 raidPtr->raidid, column); 1420 rf_unlock_mutex2(raidPtr->mutex); 1421 return (EINVAL); 1422 } 1423 if (raidPtr->Disks[column].status == 1424 rf_ds_reconstructing) { 1425 printf("raid%d: Unable to reconstruct to disk at:\n", 1426 raidPtr->raidid); 1427 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column); 1428 1429 rf_unlock_mutex2(raidPtr->mutex); 1430 return (EINVAL); 1431 } 1432 if (raidPtr->Disks[column].status == rf_ds_spared) { 1433 rf_unlock_mutex2(raidPtr->mutex); 1434 return (EINVAL); 1435 } 1436 rf_unlock_mutex2(raidPtr->mutex); 1437 1438 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1439 if (rrcopy == NULL) 1440 return(ENOMEM); 1441 1442 rrcopy->raidPtr = (void *) raidPtr; 1443 rrcopy->col = column; 1444 1445 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1446 rf_ReconstructInPlaceThread, 1447 rrcopy,"raid_reconip"); 1448 return(retcode); 1449 1450 case RAIDFRAME_GET_INFO: 1451 if (!raidPtr->valid) 1452 return (ENODEV); 1453 ucfgp = (RF_DeviceConfig_t **) data; 1454 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1455 (RF_DeviceConfig_t *)); 1456 if (d_cfg == NULL) 1457 return (ENOMEM); 1458 d_cfg->rows = 1; /* there is only 1 row now */ 1459 d_cfg->cols = raidPtr->numCol; 1460 d_cfg->ndevs = raidPtr->numCol; 1461 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1462 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1463 return (ENOMEM); 1464 } 1465 d_cfg->nspares = raidPtr->numSpare; 1466 if (d_cfg->nspares >= RF_MAX_DISKS) { 1467 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1468 return (ENOMEM); 1469 } 1470 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1471 d = 0; 1472 for (j = 0; j < d_cfg->cols; j++) { 1473 d_cfg->devs[d] = raidPtr->Disks[j]; 1474 d++; 1475 } 1476 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1477 d_cfg->spares[i] = raidPtr->Disks[j]; 1478 } 1479 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1480 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1481 1482 return (retcode); 1483 1484 case RAIDFRAME_CHECK_PARITY: 1485 *(int *) data = raidPtr->parity_good; 1486 return (0); 1487 1488 case RAIDFRAME_PARITYMAP_STATUS: 1489 if (rf_paritymap_ineligible(raidPtr)) 1490 return EINVAL; 1491 rf_paritymap_status(raidPtr->parity_map, 1492 (struct rf_pmstat *)data); 1493 return 0; 1494 1495 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1496 if (rf_paritymap_ineligible(raidPtr)) 1497 return EINVAL; 1498 if (raidPtr->parity_map == NULL) 1499 return ENOENT; /* ??? */ 1500 if (0 != rf_paritymap_set_params(raidPtr->parity_map, 1501 (struct rf_pmparams *)data, 1)) 1502 return EINVAL; 1503 return 0; 1504 1505 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1506 if (rf_paritymap_ineligible(raidPtr)) 1507 return EINVAL; 1508 *(int *) data = rf_paritymap_get_disable(raidPtr); 1509 return 0; 1510 1511 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1512 if (rf_paritymap_ineligible(raidPtr)) 1513 return EINVAL; 1514 rf_paritymap_set_disable(raidPtr, *(int *)data); 1515 /* XXX should errors be passed up? */ 1516 return 0; 1517 1518 case RAIDFRAME_RESET_ACCTOTALS: 1519 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1520 return (0); 1521 1522 case RAIDFRAME_GET_ACCTOTALS: 1523 totals = (RF_AccTotals_t *) data; 1524 *totals = raidPtr->acc_totals; 1525 return (0); 1526 1527 case RAIDFRAME_KEEP_ACCTOTALS: 1528 raidPtr->keep_acc_totals = *(int *)data; 1529 return (0); 1530 1531 case RAIDFRAME_GET_SIZE: 1532 *(int *) data = raidPtr->totalSectors; 1533 return (0); 1534 1535 /* fail a disk & optionally start reconstruction */ 1536 case RAIDFRAME_FAIL_DISK: 1537 1538 if (raidPtr->Layout.map->faultsTolerated == 0) { 1539 /* Can't do this on a RAID 0!! */ 1540 return(EINVAL); 1541 } 1542 1543 rr = (struct rf_recon_req *) data; 1544 rr->row = 0; 1545 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1546 return (EINVAL); 1547 1548 1549 rf_lock_mutex2(raidPtr->mutex); 1550 if (raidPtr->status == rf_rs_reconstructing) { 1551 /* you can't fail a disk while we're reconstructing! */ 1552 /* XXX wrong for RAID6 */ 1553 rf_unlock_mutex2(raidPtr->mutex); 1554 return (EINVAL); 1555 } 1556 if ((raidPtr->Disks[rr->col].status == 1557 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1558 /* some other component has failed. Let's not make 1559 things worse. XXX wrong for RAID6 */ 1560 rf_unlock_mutex2(raidPtr->mutex); 1561 return (EINVAL); 1562 } 1563 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1564 /* Can't fail a spared disk! */ 1565 rf_unlock_mutex2(raidPtr->mutex); 1566 return (EINVAL); 1567 } 1568 rf_unlock_mutex2(raidPtr->mutex); 1569 1570 /* make a copy of the recon request so that we don't rely on 1571 * the user's buffer */ 1572 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1573 if (rrcopy == NULL) 1574 return(ENOMEM); 1575 memcpy(rrcopy, rr, sizeof(*rr)); 1576 rrcopy->raidPtr = (void *) raidPtr; 1577 1578 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1579 rf_ReconThread, 1580 rrcopy,"raid_recon"); 1581 return (0); 1582 1583 /* invoke a copyback operation after recon on whatever disk 1584 * needs it, if any */ 1585 case RAIDFRAME_COPYBACK: 1586 1587 if (raidPtr->Layout.map->faultsTolerated == 0) { 1588 /* This makes no sense on a RAID 0!! */ 1589 return(EINVAL); 1590 } 1591 1592 if (raidPtr->copyback_in_progress == 1) { 1593 /* Copyback is already in progress! */ 1594 return(EINVAL); 1595 } 1596 1597 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1598 rf_CopybackThread, 1599 raidPtr,"raid_copyback"); 1600 return (retcode); 1601 1602 /* return the percentage completion of reconstruction */ 1603 case RAIDFRAME_CHECK_RECON_STATUS: 1604 if (raidPtr->Layout.map->faultsTolerated == 0) { 1605 /* This makes no sense on a RAID 0, so tell the 1606 user it's done. */ 1607 *(int *) data = 100; 1608 return(0); 1609 } 1610 if (raidPtr->status != rf_rs_reconstructing) 1611 *(int *) data = 100; 1612 else { 1613 if (raidPtr->reconControl->numRUsTotal > 0) { 1614 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1615 } else { 1616 *(int *) data = 0; 1617 } 1618 } 1619 return (0); 1620 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1621 progressInfoPtr = (RF_ProgressInfo_t **) data; 1622 if (raidPtr->status != rf_rs_reconstructing) { 1623 progressInfo.remaining = 0; 1624 progressInfo.completed = 100; 1625 progressInfo.total = 100; 1626 } else { 1627 progressInfo.total = 1628 raidPtr->reconControl->numRUsTotal; 1629 progressInfo.completed = 1630 raidPtr->reconControl->numRUsComplete; 1631 progressInfo.remaining = progressInfo.total - 1632 progressInfo.completed; 1633 } 1634 retcode = copyout(&progressInfo, *progressInfoPtr, 1635 sizeof(RF_ProgressInfo_t)); 1636 return (retcode); 1637 1638 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1639 if (raidPtr->Layout.map->faultsTolerated == 0) { 1640 /* This makes no sense on a RAID 0, so tell the 1641 user it's done. */ 1642 *(int *) data = 100; 1643 return(0); 1644 } 1645 if (raidPtr->parity_rewrite_in_progress == 1) { 1646 *(int *) data = 100 * 1647 raidPtr->parity_rewrite_stripes_done / 1648 raidPtr->Layout.numStripe; 1649 } else { 1650 *(int *) data = 100; 1651 } 1652 return (0); 1653 1654 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1655 progressInfoPtr = (RF_ProgressInfo_t **) data; 1656 if (raidPtr->parity_rewrite_in_progress == 1) { 1657 progressInfo.total = raidPtr->Layout.numStripe; 1658 progressInfo.completed = 1659 raidPtr->parity_rewrite_stripes_done; 1660 progressInfo.remaining = progressInfo.total - 1661 progressInfo.completed; 1662 } else { 1663 progressInfo.remaining = 0; 1664 progressInfo.completed = 100; 1665 progressInfo.total = 100; 1666 } 1667 retcode = copyout(&progressInfo, *progressInfoPtr, 1668 sizeof(RF_ProgressInfo_t)); 1669 return (retcode); 1670 1671 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1672 if (raidPtr->Layout.map->faultsTolerated == 0) { 1673 /* This makes no sense on a RAID 0 */ 1674 *(int *) data = 100; 1675 return(0); 1676 } 1677 if (raidPtr->copyback_in_progress == 1) { 1678 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1679 raidPtr->Layout.numStripe; 1680 } else { 1681 *(int *) data = 100; 1682 } 1683 return (0); 1684 1685 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1686 progressInfoPtr = (RF_ProgressInfo_t **) data; 1687 if (raidPtr->copyback_in_progress == 1) { 1688 progressInfo.total = raidPtr->Layout.numStripe; 1689 progressInfo.completed = 1690 raidPtr->copyback_stripes_done; 1691 progressInfo.remaining = progressInfo.total - 1692 progressInfo.completed; 1693 } else { 1694 progressInfo.remaining = 0; 1695 progressInfo.completed = 100; 1696 progressInfo.total = 100; 1697 } 1698 retcode = copyout(&progressInfo, *progressInfoPtr, 1699 sizeof(RF_ProgressInfo_t)); 1700 return (retcode); 1701 1702 /* the sparetable daemon calls this to wait for the kernel to 1703 * need a spare table. this ioctl does not return until a 1704 * spare table is needed. XXX -- calling mpsleep here in the 1705 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1706 * -- I should either compute the spare table in the kernel, 1707 * or have a different -- XXX XXX -- interface (a different 1708 * character device) for delivering the table -- XXX */ 1709 #if 0 1710 case RAIDFRAME_SPARET_WAIT: 1711 rf_lock_mutex2(rf_sparet_wait_mutex); 1712 while (!rf_sparet_wait_queue) 1713 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex); 1714 waitreq = rf_sparet_wait_queue; 1715 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1716 rf_unlock_mutex2(rf_sparet_wait_mutex); 1717 1718 /* structure assignment */ 1719 *((RF_SparetWait_t *) data) = *waitreq; 1720 1721 RF_Free(waitreq, sizeof(*waitreq)); 1722 return (0); 1723 1724 /* wakes up a process waiting on SPARET_WAIT and puts an error 1725 * code in it that will cause the dameon to exit */ 1726 case RAIDFRAME_ABORT_SPARET_WAIT: 1727 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1728 waitreq->fcol = -1; 1729 rf_lock_mutex2(rf_sparet_wait_mutex); 1730 waitreq->next = rf_sparet_wait_queue; 1731 rf_sparet_wait_queue = waitreq; 1732 rf_broadcast_conf2(rf_sparet_wait_cv); 1733 rf_unlock_mutex2(rf_sparet_wait_mutex); 1734 return (0); 1735 1736 /* used by the spare table daemon to deliver a spare table 1737 * into the kernel */ 1738 case RAIDFRAME_SEND_SPARET: 1739 1740 /* install the spare table */ 1741 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1742 1743 /* respond to the requestor. the return status of the spare 1744 * table installation is passed in the "fcol" field */ 1745 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1746 waitreq->fcol = retcode; 1747 rf_lock_mutex2(rf_sparet_wait_mutex); 1748 waitreq->next = rf_sparet_resp_queue; 1749 rf_sparet_resp_queue = waitreq; 1750 rf_broadcast_cond2(rf_sparet_resp_cv); 1751 rf_unlock_mutex2(rf_sparet_wait_mutex); 1752 1753 return (retcode); 1754 #endif 1755 1756 default: 1757 break; /* fall through to the os-specific code below */ 1758 1759 } 1760 1761 if (!raidPtr->valid) 1762 return (EINVAL); 1763 1764 /* 1765 * Add support for "regular" device ioctls here. 1766 */ 1767 1768 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l); 1769 if (error != EPASSTHROUGH) 1770 return (error); 1771 1772 switch (cmd) { 1773 case DIOCGDINFO: 1774 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1775 break; 1776 #ifdef __HAVE_OLD_DISKLABEL 1777 case ODIOCGDINFO: 1778 newlabel = *(rs->sc_dkdev.dk_label); 1779 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1780 return ENOTTY; 1781 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1782 break; 1783 #endif 1784 1785 case DIOCGPART: 1786 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1787 ((struct partinfo *) data)->part = 1788 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1789 break; 1790 1791 case DIOCWDINFO: 1792 case DIOCSDINFO: 1793 #ifdef __HAVE_OLD_DISKLABEL 1794 case ODIOCWDINFO: 1795 case ODIOCSDINFO: 1796 #endif 1797 { 1798 struct disklabel *lp; 1799 #ifdef __HAVE_OLD_DISKLABEL 1800 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1801 memset(&newlabel, 0, sizeof newlabel); 1802 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1803 lp = &newlabel; 1804 } else 1805 #endif 1806 lp = (struct disklabel *)data; 1807 1808 if ((error = raidlock(rs)) != 0) 1809 return (error); 1810 1811 rs->sc_flags |= RAIDF_LABELLING; 1812 1813 error = setdisklabel(rs->sc_dkdev.dk_label, 1814 lp, 0, rs->sc_dkdev.dk_cpulabel); 1815 if (error == 0) { 1816 if (cmd == DIOCWDINFO 1817 #ifdef __HAVE_OLD_DISKLABEL 1818 || cmd == ODIOCWDINFO 1819 #endif 1820 ) 1821 error = writedisklabel(RAIDLABELDEV(dev), 1822 raidstrategy, rs->sc_dkdev.dk_label, 1823 rs->sc_dkdev.dk_cpulabel); 1824 } 1825 rs->sc_flags &= ~RAIDF_LABELLING; 1826 1827 raidunlock(rs); 1828 1829 if (error) 1830 return (error); 1831 break; 1832 } 1833 1834 case DIOCWLABEL: 1835 if (*(int *) data != 0) 1836 rs->sc_flags |= RAIDF_WLABEL; 1837 else 1838 rs->sc_flags &= ~RAIDF_WLABEL; 1839 break; 1840 1841 case DIOCGDEFLABEL: 1842 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1843 break; 1844 1845 #ifdef __HAVE_OLD_DISKLABEL 1846 case ODIOCGDEFLABEL: 1847 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1848 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1849 return ENOTTY; 1850 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1851 break; 1852 #endif 1853 1854 case DIOCAWEDGE: 1855 case DIOCDWEDGE: 1856 dkw = (void *)data; 1857 1858 /* If the ioctl happens here, the parent is us. */ 1859 (void)strcpy(dkw->dkw_parent, rs->sc_xname); 1860 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw); 1861 1862 case DIOCLWEDGES: 1863 return dkwedge_list(&rs->sc_dkdev, 1864 (struct dkwedge_list *)data, l); 1865 case DIOCCACHESYNC: 1866 return rf_sync_component_caches(raidPtr); 1867 1868 case DIOCGSTRATEGY: 1869 { 1870 struct disk_strategy *dks = (void *)data; 1871 1872 s = splbio(); 1873 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue), 1874 sizeof(dks->dks_name)); 1875 splx(s); 1876 dks->dks_paramlen = 0; 1877 1878 return 0; 1879 } 1880 1881 case DIOCSSTRATEGY: 1882 { 1883 struct disk_strategy *dks = (void *)data; 1884 struct bufq_state *new; 1885 struct bufq_state *old; 1886 1887 if (dks->dks_param != NULL) { 1888 return EINVAL; 1889 } 1890 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */ 1891 error = bufq_alloc(&new, dks->dks_name, 1892 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK); 1893 if (error) { 1894 return error; 1895 } 1896 s = splbio(); 1897 old = rs->buf_queue; 1898 bufq_move(new, old); 1899 rs->buf_queue = new; 1900 splx(s); 1901 bufq_free(old); 1902 1903 return 0; 1904 } 1905 1906 default: 1907 retcode = ENOTTY; 1908 } 1909 return (retcode); 1910 1911 } 1912 1913 1914 /* raidinit -- complete the rest of the initialization for the 1915 RAIDframe device. */ 1916 1917 1918 static void 1919 raidinit(RF_Raid_t *raidPtr) 1920 { 1921 cfdata_t cf; 1922 struct raid_softc *rs; 1923 int unit; 1924 1925 unit = raidPtr->raidid; 1926 1927 rs = &raid_softc[unit]; 1928 1929 /* XXX should check return code first... */ 1930 rs->sc_flags |= RAIDF_INITED; 1931 1932 /* XXX doesn't check bounds. */ 1933 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1934 1935 /* attach the pseudo device */ 1936 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1937 cf->cf_name = raid_cd.cd_name; 1938 cf->cf_atname = raid_cd.cd_name; 1939 cf->cf_unit = unit; 1940 cf->cf_fstate = FSTATE_STAR; 1941 1942 rs->sc_dev = config_attach_pseudo(cf); 1943 1944 if (rs->sc_dev == NULL) { 1945 printf("raid%d: config_attach_pseudo failed\n", 1946 raidPtr->raidid); 1947 rs->sc_flags &= ~RAIDF_INITED; 1948 free(cf, M_RAIDFRAME); 1949 return; 1950 } 1951 1952 /* disk_attach actually creates space for the CPU disklabel, among 1953 * other things, so it's critical to call this *BEFORE* we try putzing 1954 * with disklabels. */ 1955 1956 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver); 1957 disk_attach(&rs->sc_dkdev); 1958 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector); 1959 1960 /* XXX There may be a weird interaction here between this, and 1961 * protectedSectors, as used in RAIDframe. */ 1962 1963 rs->sc_size = raidPtr->totalSectors; 1964 1965 dkwedge_discover(&rs->sc_dkdev); 1966 1967 rf_set_properties(rs, raidPtr); 1968 1969 } 1970 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1971 /* wake up the daemon & tell it to get us a spare table 1972 * XXX 1973 * the entries in the queues should be tagged with the raidPtr 1974 * so that in the extremely rare case that two recons happen at once, 1975 * we know for which device were requesting a spare table 1976 * XXX 1977 * 1978 * XXX This code is not currently used. GO 1979 */ 1980 int 1981 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1982 { 1983 int retcode; 1984 1985 rf_lock_mutex2(rf_sparet_wait_mutex); 1986 req->next = rf_sparet_wait_queue; 1987 rf_sparet_wait_queue = req; 1988 rf_broadcast_cond2(rf_sparet_wait_cv); 1989 1990 /* mpsleep unlocks the mutex */ 1991 while (!rf_sparet_resp_queue) { 1992 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex); 1993 } 1994 req = rf_sparet_resp_queue; 1995 rf_sparet_resp_queue = req->next; 1996 rf_unlock_mutex2(rf_sparet_wait_mutex); 1997 1998 retcode = req->fcol; 1999 RF_Free(req, sizeof(*req)); /* this is not the same req as we 2000 * alloc'd */ 2001 return (retcode); 2002 } 2003 #endif 2004 2005 /* a wrapper around rf_DoAccess that extracts appropriate info from the 2006 * bp & passes it down. 2007 * any calls originating in the kernel must use non-blocking I/O 2008 * do some extra sanity checking to return "appropriate" error values for 2009 * certain conditions (to make some standard utilities work) 2010 * 2011 * Formerly known as: rf_DoAccessKernel 2012 */ 2013 void 2014 raidstart(RF_Raid_t *raidPtr) 2015 { 2016 RF_SectorCount_t num_blocks, pb, sum; 2017 RF_RaidAddr_t raid_addr; 2018 struct partition *pp; 2019 daddr_t blocknum; 2020 int unit; 2021 struct raid_softc *rs; 2022 int do_async; 2023 struct buf *bp; 2024 int rc; 2025 2026 unit = raidPtr->raidid; 2027 rs = &raid_softc[unit]; 2028 2029 /* quick check to see if anything has died recently */ 2030 rf_lock_mutex2(raidPtr->mutex); 2031 if (raidPtr->numNewFailures > 0) { 2032 rf_unlock_mutex2(raidPtr->mutex); 2033 rf_update_component_labels(raidPtr, 2034 RF_NORMAL_COMPONENT_UPDATE); 2035 rf_lock_mutex2(raidPtr->mutex); 2036 raidPtr->numNewFailures--; 2037 } 2038 2039 /* Check to see if we're at the limit... */ 2040 while (raidPtr->openings > 0) { 2041 rf_unlock_mutex2(raidPtr->mutex); 2042 2043 /* get the next item, if any, from the queue */ 2044 if ((bp = bufq_get(rs->buf_queue)) == NULL) { 2045 /* nothing more to do */ 2046 return; 2047 } 2048 2049 /* Ok, for the bp we have here, bp->b_blkno is relative to the 2050 * partition.. Need to make it absolute to the underlying 2051 * device.. */ 2052 2053 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector; 2054 if (DISKPART(bp->b_dev) != RAW_PART) { 2055 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 2056 blocknum += pp->p_offset; 2057 } 2058 2059 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 2060 (int) blocknum)); 2061 2062 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 2063 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 2064 2065 /* *THIS* is where we adjust what block we're going to... 2066 * but DO NOT TOUCH bp->b_blkno!!! */ 2067 raid_addr = blocknum; 2068 2069 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 2070 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 2071 sum = raid_addr + num_blocks + pb; 2072 if (1 || rf_debugKernelAccess) { 2073 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 2074 (int) raid_addr, (int) sum, (int) num_blocks, 2075 (int) pb, (int) bp->b_resid)); 2076 } 2077 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 2078 || (sum < num_blocks) || (sum < pb)) { 2079 bp->b_error = ENOSPC; 2080 bp->b_resid = bp->b_bcount; 2081 biodone(bp); 2082 rf_lock_mutex2(raidPtr->mutex); 2083 continue; 2084 } 2085 /* 2086 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 2087 */ 2088 2089 if (bp->b_bcount & raidPtr->sectorMask) { 2090 bp->b_error = EINVAL; 2091 bp->b_resid = bp->b_bcount; 2092 biodone(bp); 2093 rf_lock_mutex2(raidPtr->mutex); 2094 continue; 2095 2096 } 2097 db1_printf(("Calling DoAccess..\n")); 2098 2099 2100 rf_lock_mutex2(raidPtr->mutex); 2101 raidPtr->openings--; 2102 rf_unlock_mutex2(raidPtr->mutex); 2103 2104 /* 2105 * Everything is async. 2106 */ 2107 do_async = 1; 2108 2109 disk_busy(&rs->sc_dkdev); 2110 2111 /* XXX we're still at splbio() here... do we *really* 2112 need to be? */ 2113 2114 /* don't ever condition on bp->b_flags & B_WRITE. 2115 * always condition on B_READ instead */ 2116 2117 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 2118 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 2119 do_async, raid_addr, num_blocks, 2120 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 2121 2122 if (rc) { 2123 bp->b_error = rc; 2124 bp->b_resid = bp->b_bcount; 2125 biodone(bp); 2126 /* continue loop */ 2127 } 2128 2129 rf_lock_mutex2(raidPtr->mutex); 2130 } 2131 rf_unlock_mutex2(raidPtr->mutex); 2132 } 2133 2134 2135 2136 2137 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 2138 2139 int 2140 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 2141 { 2142 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2143 struct buf *bp; 2144 2145 req->queue = queue; 2146 bp = req->bp; 2147 2148 switch (req->type) { 2149 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2150 /* XXX need to do something extra here.. */ 2151 /* I'm leaving this in, as I've never actually seen it used, 2152 * and I'd like folks to report it... GO */ 2153 printf(("WAKEUP CALLED\n")); 2154 queue->numOutstanding++; 2155 2156 bp->b_flags = 0; 2157 bp->b_private = req; 2158 2159 KernelWakeupFunc(bp); 2160 break; 2161 2162 case RF_IO_TYPE_READ: 2163 case RF_IO_TYPE_WRITE: 2164 #if RF_ACC_TRACE > 0 2165 if (req->tracerec) { 2166 RF_ETIMER_START(req->tracerec->timer); 2167 } 2168 #endif 2169 InitBP(bp, queue->rf_cinfo->ci_vp, 2170 op, queue->rf_cinfo->ci_dev, 2171 req->sectorOffset, req->numSector, 2172 req->buf, KernelWakeupFunc, (void *) req, 2173 queue->raidPtr->logBytesPerSector, req->b_proc); 2174 2175 if (rf_debugKernelAccess) { 2176 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2177 (long) bp->b_blkno)); 2178 } 2179 queue->numOutstanding++; 2180 queue->last_deq_sector = req->sectorOffset; 2181 /* acc wouldn't have been let in if there were any pending 2182 * reqs at any other priority */ 2183 queue->curPriority = req->priority; 2184 2185 db1_printf(("Going for %c to unit %d col %d\n", 2186 req->type, queue->raidPtr->raidid, 2187 queue->col)); 2188 db1_printf(("sector %d count %d (%d bytes) %d\n", 2189 (int) req->sectorOffset, (int) req->numSector, 2190 (int) (req->numSector << 2191 queue->raidPtr->logBytesPerSector), 2192 (int) queue->raidPtr->logBytesPerSector)); 2193 2194 /* 2195 * XXX: drop lock here since this can block at 2196 * least with backing SCSI devices. Retake it 2197 * to minimize fuss with calling interfaces. 2198 */ 2199 2200 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam"); 2201 bdev_strategy(bp); 2202 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam"); 2203 break; 2204 2205 default: 2206 panic("bad req->type in rf_DispatchKernelIO"); 2207 } 2208 db1_printf(("Exiting from DispatchKernelIO\n")); 2209 2210 return (0); 2211 } 2212 /* this is the callback function associated with a I/O invoked from 2213 kernel code. 2214 */ 2215 static void 2216 KernelWakeupFunc(struct buf *bp) 2217 { 2218 RF_DiskQueueData_t *req = NULL; 2219 RF_DiskQueue_t *queue; 2220 2221 db1_printf(("recovering the request queue:\n")); 2222 2223 req = bp->b_private; 2224 2225 queue = (RF_DiskQueue_t *) req->queue; 2226 2227 rf_lock_mutex2(queue->raidPtr->iodone_lock); 2228 2229 #if RF_ACC_TRACE > 0 2230 if (req->tracerec) { 2231 RF_ETIMER_STOP(req->tracerec->timer); 2232 RF_ETIMER_EVAL(req->tracerec->timer); 2233 rf_lock_mutex2(rf_tracing_mutex); 2234 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2235 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2236 req->tracerec->num_phys_ios++; 2237 rf_unlock_mutex2(rf_tracing_mutex); 2238 } 2239 #endif 2240 2241 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2242 * ballistic, and mark the component as hosed... */ 2243 2244 if (bp->b_error != 0) { 2245 /* Mark the disk as dead */ 2246 /* but only mark it once... */ 2247 /* and only if it wouldn't leave this RAID set 2248 completely broken */ 2249 if (((queue->raidPtr->Disks[queue->col].status == 2250 rf_ds_optimal) || 2251 (queue->raidPtr->Disks[queue->col].status == 2252 rf_ds_used_spare)) && 2253 (queue->raidPtr->numFailures < 2254 queue->raidPtr->Layout.map->faultsTolerated)) { 2255 printf("raid%d: IO Error. Marking %s as failed.\n", 2256 queue->raidPtr->raidid, 2257 queue->raidPtr->Disks[queue->col].devname); 2258 queue->raidPtr->Disks[queue->col].status = 2259 rf_ds_failed; 2260 queue->raidPtr->status = rf_rs_degraded; 2261 queue->raidPtr->numFailures++; 2262 queue->raidPtr->numNewFailures++; 2263 } else { /* Disk is already dead... */ 2264 /* printf("Disk already marked as dead!\n"); */ 2265 } 2266 2267 } 2268 2269 /* Fill in the error value */ 2270 req->error = bp->b_error; 2271 2272 /* Drop this one on the "finished" queue... */ 2273 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2274 2275 /* Let the raidio thread know there is work to be done. */ 2276 rf_signal_cond2(queue->raidPtr->iodone_cv); 2277 2278 rf_unlock_mutex2(queue->raidPtr->iodone_lock); 2279 } 2280 2281 2282 /* 2283 * initialize a buf structure for doing an I/O in the kernel. 2284 */ 2285 static void 2286 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2287 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2288 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 2289 struct proc *b_proc) 2290 { 2291 /* bp->b_flags = B_PHYS | rw_flag; */ 2292 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */ 2293 bp->b_oflags = 0; 2294 bp->b_cflags = 0; 2295 bp->b_bcount = numSect << logBytesPerSector; 2296 bp->b_bufsize = bp->b_bcount; 2297 bp->b_error = 0; 2298 bp->b_dev = dev; 2299 bp->b_data = bf; 2300 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT; 2301 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2302 if (bp->b_bcount == 0) { 2303 panic("bp->b_bcount is zero in InitBP!!"); 2304 } 2305 bp->b_proc = b_proc; 2306 bp->b_iodone = cbFunc; 2307 bp->b_private = cbArg; 2308 } 2309 2310 static void 2311 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 2312 struct disklabel *lp) 2313 { 2314 memset(lp, 0, sizeof(*lp)); 2315 2316 /* fabricate a label... */ 2317 lp->d_secperunit = raidPtr->totalSectors; 2318 lp->d_secsize = raidPtr->bytesPerSector; 2319 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2320 lp->d_ntracks = 4 * raidPtr->numCol; 2321 lp->d_ncylinders = raidPtr->totalSectors / 2322 (lp->d_nsectors * lp->d_ntracks); 2323 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2324 2325 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2326 lp->d_type = DTYPE_RAID; 2327 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2328 lp->d_rpm = 3600; 2329 lp->d_interleave = 1; 2330 lp->d_flags = 0; 2331 2332 lp->d_partitions[RAW_PART].p_offset = 0; 2333 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2334 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2335 lp->d_npartitions = RAW_PART + 1; 2336 2337 lp->d_magic = DISKMAGIC; 2338 lp->d_magic2 = DISKMAGIC; 2339 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2340 2341 } 2342 /* 2343 * Read the disklabel from the raid device. If one is not present, fake one 2344 * up. 2345 */ 2346 static void 2347 raidgetdisklabel(dev_t dev) 2348 { 2349 int unit = raidunit(dev); 2350 struct raid_softc *rs = &raid_softc[unit]; 2351 const char *errstring; 2352 struct disklabel *lp = rs->sc_dkdev.dk_label; 2353 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2354 RF_Raid_t *raidPtr; 2355 2356 db1_printf(("Getting the disklabel...\n")); 2357 2358 memset(clp, 0, sizeof(*clp)); 2359 2360 raidPtr = raidPtrs[unit]; 2361 2362 raidgetdefaultlabel(raidPtr, rs, lp); 2363 2364 /* 2365 * Call the generic disklabel extraction routine. 2366 */ 2367 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2368 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2369 if (errstring) 2370 raidmakedisklabel(rs); 2371 else { 2372 int i; 2373 struct partition *pp; 2374 2375 /* 2376 * Sanity check whether the found disklabel is valid. 2377 * 2378 * This is necessary since total size of the raid device 2379 * may vary when an interleave is changed even though exactly 2380 * same components are used, and old disklabel may used 2381 * if that is found. 2382 */ 2383 if (lp->d_secperunit != rs->sc_size) 2384 printf("raid%d: WARNING: %s: " 2385 "total sector size in disklabel (%" PRIu32 ") != " 2386 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname, 2387 lp->d_secperunit, rs->sc_size); 2388 for (i = 0; i < lp->d_npartitions; i++) { 2389 pp = &lp->d_partitions[i]; 2390 if (pp->p_offset + pp->p_size > rs->sc_size) 2391 printf("raid%d: WARNING: %s: end of partition `%c' " 2392 "exceeds the size of raid (%" PRIu64 ")\n", 2393 unit, rs->sc_xname, 'a' + i, rs->sc_size); 2394 } 2395 } 2396 2397 } 2398 /* 2399 * Take care of things one might want to take care of in the event 2400 * that a disklabel isn't present. 2401 */ 2402 static void 2403 raidmakedisklabel(struct raid_softc *rs) 2404 { 2405 struct disklabel *lp = rs->sc_dkdev.dk_label; 2406 db1_printf(("Making a label..\n")); 2407 2408 /* 2409 * For historical reasons, if there's no disklabel present 2410 * the raw partition must be marked FS_BSDFFS. 2411 */ 2412 2413 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2414 2415 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2416 2417 lp->d_checksum = dkcksum(lp); 2418 } 2419 /* 2420 * Wait interruptibly for an exclusive lock. 2421 * 2422 * XXX 2423 * Several drivers do this; it should be abstracted and made MP-safe. 2424 * (Hmm... where have we seen this warning before :-> GO ) 2425 */ 2426 static int 2427 raidlock(struct raid_softc *rs) 2428 { 2429 int error; 2430 2431 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2432 rs->sc_flags |= RAIDF_WANTED; 2433 if ((error = 2434 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2435 return (error); 2436 } 2437 rs->sc_flags |= RAIDF_LOCKED; 2438 return (0); 2439 } 2440 /* 2441 * Unlock and wake up any waiters. 2442 */ 2443 static void 2444 raidunlock(struct raid_softc *rs) 2445 { 2446 2447 rs->sc_flags &= ~RAIDF_LOCKED; 2448 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2449 rs->sc_flags &= ~RAIDF_WANTED; 2450 wakeup(rs); 2451 } 2452 } 2453 2454 2455 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2456 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2457 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE 2458 2459 static daddr_t 2460 rf_component_info_offset(void) 2461 { 2462 2463 return RF_COMPONENT_INFO_OFFSET; 2464 } 2465 2466 static daddr_t 2467 rf_component_info_size(unsigned secsize) 2468 { 2469 daddr_t info_size; 2470 2471 KASSERT(secsize); 2472 if (secsize > RF_COMPONENT_INFO_SIZE) 2473 info_size = secsize; 2474 else 2475 info_size = RF_COMPONENT_INFO_SIZE; 2476 2477 return info_size; 2478 } 2479 2480 static daddr_t 2481 rf_parity_map_offset(RF_Raid_t *raidPtr) 2482 { 2483 daddr_t map_offset; 2484 2485 KASSERT(raidPtr->bytesPerSector); 2486 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE) 2487 map_offset = raidPtr->bytesPerSector; 2488 else 2489 map_offset = RF_COMPONENT_INFO_SIZE; 2490 map_offset += rf_component_info_offset(); 2491 2492 return map_offset; 2493 } 2494 2495 static daddr_t 2496 rf_parity_map_size(RF_Raid_t *raidPtr) 2497 { 2498 daddr_t map_size; 2499 2500 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE) 2501 map_size = raidPtr->bytesPerSector; 2502 else 2503 map_size = RF_PARITY_MAP_SIZE; 2504 2505 return map_size; 2506 } 2507 2508 int 2509 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col) 2510 { 2511 RF_ComponentLabel_t *clabel; 2512 2513 clabel = raidget_component_label(raidPtr, col); 2514 clabel->clean = RF_RAID_CLEAN; 2515 raidflush_component_label(raidPtr, col); 2516 return(0); 2517 } 2518 2519 2520 int 2521 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col) 2522 { 2523 RF_ComponentLabel_t *clabel; 2524 2525 clabel = raidget_component_label(raidPtr, col); 2526 clabel->clean = RF_RAID_DIRTY; 2527 raidflush_component_label(raidPtr, col); 2528 return(0); 2529 } 2530 2531 int 2532 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2533 { 2534 KASSERT(raidPtr->bytesPerSector); 2535 return raidread_component_label(raidPtr->bytesPerSector, 2536 raidPtr->Disks[col].dev, 2537 raidPtr->raid_cinfo[col].ci_vp, 2538 &raidPtr->raid_cinfo[col].ci_label); 2539 } 2540 2541 RF_ComponentLabel_t * 2542 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2543 { 2544 return &raidPtr->raid_cinfo[col].ci_label; 2545 } 2546 2547 int 2548 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2549 { 2550 RF_ComponentLabel_t *label; 2551 2552 label = &raidPtr->raid_cinfo[col].ci_label; 2553 label->mod_counter = raidPtr->mod_counter; 2554 #ifndef RF_NO_PARITY_MAP 2555 label->parity_map_modcount = label->mod_counter; 2556 #endif 2557 return raidwrite_component_label(raidPtr->bytesPerSector, 2558 raidPtr->Disks[col].dev, 2559 raidPtr->raid_cinfo[col].ci_vp, label); 2560 } 2561 2562 2563 static int 2564 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2565 RF_ComponentLabel_t *clabel) 2566 { 2567 return raidread_component_area(dev, b_vp, clabel, 2568 sizeof(RF_ComponentLabel_t), 2569 rf_component_info_offset(), 2570 rf_component_info_size(secsize)); 2571 } 2572 2573 /* ARGSUSED */ 2574 static int 2575 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data, 2576 size_t msize, daddr_t offset, daddr_t dsize) 2577 { 2578 struct buf *bp; 2579 const struct bdevsw *bdev; 2580 int error; 2581 2582 /* XXX should probably ensure that we don't try to do this if 2583 someone has changed rf_protected_sectors. */ 2584 2585 if (b_vp == NULL) { 2586 /* For whatever reason, this component is not valid. 2587 Don't try to read a component label from it. */ 2588 return(EINVAL); 2589 } 2590 2591 /* get a block of the appropriate size... */ 2592 bp = geteblk((int)dsize); 2593 bp->b_dev = dev; 2594 2595 /* get our ducks in a row for the read */ 2596 bp->b_blkno = offset / DEV_BSIZE; 2597 bp->b_bcount = dsize; 2598 bp->b_flags |= B_READ; 2599 bp->b_resid = dsize; 2600 2601 bdev = bdevsw_lookup(bp->b_dev); 2602 if (bdev == NULL) 2603 return (ENXIO); 2604 (*bdev->d_strategy)(bp); 2605 2606 error = biowait(bp); 2607 2608 if (!error) { 2609 memcpy(data, bp->b_data, msize); 2610 } 2611 2612 brelse(bp, 0); 2613 return(error); 2614 } 2615 2616 2617 static int 2618 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2619 RF_ComponentLabel_t *clabel) 2620 { 2621 return raidwrite_component_area(dev, b_vp, clabel, 2622 sizeof(RF_ComponentLabel_t), 2623 rf_component_info_offset(), 2624 rf_component_info_size(secsize), 0); 2625 } 2626 2627 /* ARGSUSED */ 2628 static int 2629 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data, 2630 size_t msize, daddr_t offset, daddr_t dsize, int asyncp) 2631 { 2632 struct buf *bp; 2633 const struct bdevsw *bdev; 2634 int error; 2635 2636 /* get a block of the appropriate size... */ 2637 bp = geteblk((int)dsize); 2638 bp->b_dev = dev; 2639 2640 /* get our ducks in a row for the write */ 2641 bp->b_blkno = offset / DEV_BSIZE; 2642 bp->b_bcount = dsize; 2643 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0); 2644 bp->b_resid = dsize; 2645 2646 memset(bp->b_data, 0, dsize); 2647 memcpy(bp->b_data, data, msize); 2648 2649 bdev = bdevsw_lookup(bp->b_dev); 2650 if (bdev == NULL) 2651 return (ENXIO); 2652 (*bdev->d_strategy)(bp); 2653 if (asyncp) 2654 return 0; 2655 error = biowait(bp); 2656 brelse(bp, 0); 2657 if (error) { 2658 #if 1 2659 printf("Failed to write RAID component info!\n"); 2660 #endif 2661 } 2662 2663 return(error); 2664 } 2665 2666 void 2667 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2668 { 2669 int c; 2670 2671 for (c = 0; c < raidPtr->numCol; c++) { 2672 /* Skip dead disks. */ 2673 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2674 continue; 2675 /* XXXjld: what if an error occurs here? */ 2676 raidwrite_component_area(raidPtr->Disks[c].dev, 2677 raidPtr->raid_cinfo[c].ci_vp, map, 2678 RF_PARITYMAP_NBYTE, 2679 rf_parity_map_offset(raidPtr), 2680 rf_parity_map_size(raidPtr), 0); 2681 } 2682 } 2683 2684 void 2685 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2686 { 2687 struct rf_paritymap_ondisk tmp; 2688 int c,first; 2689 2690 first=1; 2691 for (c = 0; c < raidPtr->numCol; c++) { 2692 /* Skip dead disks. */ 2693 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2694 continue; 2695 raidread_component_area(raidPtr->Disks[c].dev, 2696 raidPtr->raid_cinfo[c].ci_vp, &tmp, 2697 RF_PARITYMAP_NBYTE, 2698 rf_parity_map_offset(raidPtr), 2699 rf_parity_map_size(raidPtr)); 2700 if (first) { 2701 memcpy(map, &tmp, sizeof(*map)); 2702 first = 0; 2703 } else { 2704 rf_paritymap_merge(map, &tmp); 2705 } 2706 } 2707 } 2708 2709 void 2710 rf_markalldirty(RF_Raid_t *raidPtr) 2711 { 2712 RF_ComponentLabel_t *clabel; 2713 int sparecol; 2714 int c; 2715 int j; 2716 int scol = -1; 2717 2718 raidPtr->mod_counter++; 2719 for (c = 0; c < raidPtr->numCol; c++) { 2720 /* we don't want to touch (at all) a disk that has 2721 failed */ 2722 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2723 clabel = raidget_component_label(raidPtr, c); 2724 if (clabel->status == rf_ds_spared) { 2725 /* XXX do something special... 2726 but whatever you do, don't 2727 try to access it!! */ 2728 } else { 2729 raidmarkdirty(raidPtr, c); 2730 } 2731 } 2732 } 2733 2734 for( c = 0; c < raidPtr->numSpare ; c++) { 2735 sparecol = raidPtr->numCol + c; 2736 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2737 /* 2738 2739 we claim this disk is "optimal" if it's 2740 rf_ds_used_spare, as that means it should be 2741 directly substitutable for the disk it replaced. 2742 We note that too... 2743 2744 */ 2745 2746 for(j=0;j<raidPtr->numCol;j++) { 2747 if (raidPtr->Disks[j].spareCol == sparecol) { 2748 scol = j; 2749 break; 2750 } 2751 } 2752 2753 clabel = raidget_component_label(raidPtr, sparecol); 2754 /* make sure status is noted */ 2755 2756 raid_init_component_label(raidPtr, clabel); 2757 2758 clabel->row = 0; 2759 clabel->column = scol; 2760 /* Note: we *don't* change status from rf_ds_used_spare 2761 to rf_ds_optimal */ 2762 /* clabel.status = rf_ds_optimal; */ 2763 2764 raidmarkdirty(raidPtr, sparecol); 2765 } 2766 } 2767 } 2768 2769 2770 void 2771 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2772 { 2773 RF_ComponentLabel_t *clabel; 2774 int sparecol; 2775 int c; 2776 int j; 2777 int scol; 2778 2779 scol = -1; 2780 2781 /* XXX should do extra checks to make sure things really are clean, 2782 rather than blindly setting the clean bit... */ 2783 2784 raidPtr->mod_counter++; 2785 2786 for (c = 0; c < raidPtr->numCol; c++) { 2787 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2788 clabel = raidget_component_label(raidPtr, c); 2789 /* make sure status is noted */ 2790 clabel->status = rf_ds_optimal; 2791 2792 /* note what unit we are configured as */ 2793 clabel->last_unit = raidPtr->raidid; 2794 2795 raidflush_component_label(raidPtr, c); 2796 if (final == RF_FINAL_COMPONENT_UPDATE) { 2797 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2798 raidmarkclean(raidPtr, c); 2799 } 2800 } 2801 } 2802 /* else we don't touch it.. */ 2803 } 2804 2805 for( c = 0; c < raidPtr->numSpare ; c++) { 2806 sparecol = raidPtr->numCol + c; 2807 /* Need to ensure that the reconstruct actually completed! */ 2808 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2809 /* 2810 2811 we claim this disk is "optimal" if it's 2812 rf_ds_used_spare, as that means it should be 2813 directly substitutable for the disk it replaced. 2814 We note that too... 2815 2816 */ 2817 2818 for(j=0;j<raidPtr->numCol;j++) { 2819 if (raidPtr->Disks[j].spareCol == sparecol) { 2820 scol = j; 2821 break; 2822 } 2823 } 2824 2825 /* XXX shouldn't *really* need this... */ 2826 clabel = raidget_component_label(raidPtr, sparecol); 2827 /* make sure status is noted */ 2828 2829 raid_init_component_label(raidPtr, clabel); 2830 2831 clabel->column = scol; 2832 clabel->status = rf_ds_optimal; 2833 clabel->last_unit = raidPtr->raidid; 2834 2835 raidflush_component_label(raidPtr, sparecol); 2836 if (final == RF_FINAL_COMPONENT_UPDATE) { 2837 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2838 raidmarkclean(raidPtr, sparecol); 2839 } 2840 } 2841 } 2842 } 2843 } 2844 2845 void 2846 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2847 { 2848 2849 if (vp != NULL) { 2850 if (auto_configured == 1) { 2851 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2852 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2853 vput(vp); 2854 2855 } else { 2856 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2857 } 2858 } 2859 } 2860 2861 2862 void 2863 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2864 { 2865 int r,c; 2866 struct vnode *vp; 2867 int acd; 2868 2869 2870 /* We take this opportunity to close the vnodes like we should.. */ 2871 2872 for (c = 0; c < raidPtr->numCol; c++) { 2873 vp = raidPtr->raid_cinfo[c].ci_vp; 2874 acd = raidPtr->Disks[c].auto_configured; 2875 rf_close_component(raidPtr, vp, acd); 2876 raidPtr->raid_cinfo[c].ci_vp = NULL; 2877 raidPtr->Disks[c].auto_configured = 0; 2878 } 2879 2880 for (r = 0; r < raidPtr->numSpare; r++) { 2881 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2882 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2883 rf_close_component(raidPtr, vp, acd); 2884 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2885 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2886 } 2887 } 2888 2889 2890 void 2891 rf_ReconThread(struct rf_recon_req *req) 2892 { 2893 int s; 2894 RF_Raid_t *raidPtr; 2895 2896 s = splbio(); 2897 raidPtr = (RF_Raid_t *) req->raidPtr; 2898 raidPtr->recon_in_progress = 1; 2899 2900 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2901 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2902 2903 RF_Free(req, sizeof(*req)); 2904 2905 raidPtr->recon_in_progress = 0; 2906 splx(s); 2907 2908 /* That's all... */ 2909 kthread_exit(0); /* does not return */ 2910 } 2911 2912 void 2913 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2914 { 2915 int retcode; 2916 int s; 2917 2918 raidPtr->parity_rewrite_stripes_done = 0; 2919 raidPtr->parity_rewrite_in_progress = 1; 2920 s = splbio(); 2921 retcode = rf_RewriteParity(raidPtr); 2922 splx(s); 2923 if (retcode) { 2924 printf("raid%d: Error re-writing parity (%d)!\n", 2925 raidPtr->raidid, retcode); 2926 } else { 2927 /* set the clean bit! If we shutdown correctly, 2928 the clean bit on each component label will get 2929 set */ 2930 raidPtr->parity_good = RF_RAID_CLEAN; 2931 } 2932 raidPtr->parity_rewrite_in_progress = 0; 2933 2934 /* Anyone waiting for us to stop? If so, inform them... */ 2935 if (raidPtr->waitShutdown) { 2936 wakeup(&raidPtr->parity_rewrite_in_progress); 2937 } 2938 2939 /* That's all... */ 2940 kthread_exit(0); /* does not return */ 2941 } 2942 2943 2944 void 2945 rf_CopybackThread(RF_Raid_t *raidPtr) 2946 { 2947 int s; 2948 2949 raidPtr->copyback_in_progress = 1; 2950 s = splbio(); 2951 rf_CopybackReconstructedData(raidPtr); 2952 splx(s); 2953 raidPtr->copyback_in_progress = 0; 2954 2955 /* That's all... */ 2956 kthread_exit(0); /* does not return */ 2957 } 2958 2959 2960 void 2961 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 2962 { 2963 int s; 2964 RF_Raid_t *raidPtr; 2965 2966 s = splbio(); 2967 raidPtr = req->raidPtr; 2968 raidPtr->recon_in_progress = 1; 2969 rf_ReconstructInPlace(raidPtr, req->col); 2970 RF_Free(req, sizeof(*req)); 2971 raidPtr->recon_in_progress = 0; 2972 splx(s); 2973 2974 /* That's all... */ 2975 kthread_exit(0); /* does not return */ 2976 } 2977 2978 static RF_AutoConfig_t * 2979 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 2980 const char *cname, RF_SectorCount_t size, uint64_t numsecs, 2981 unsigned secsize) 2982 { 2983 int good_one = 0; 2984 RF_ComponentLabel_t *clabel; 2985 RF_AutoConfig_t *ac; 2986 2987 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT); 2988 if (clabel == NULL) { 2989 oomem: 2990 while(ac_list) { 2991 ac = ac_list; 2992 if (ac->clabel) 2993 free(ac->clabel, M_RAIDFRAME); 2994 ac_list = ac_list->next; 2995 free(ac, M_RAIDFRAME); 2996 } 2997 printf("RAID auto config: out of memory!\n"); 2998 return NULL; /* XXX probably should panic? */ 2999 } 3000 3001 if (!raidread_component_label(secsize, dev, vp, clabel)) { 3002 /* Got the label. Does it look reasonable? */ 3003 if (rf_reasonable_label(clabel, numsecs) && 3004 (rf_component_label_partitionsize(clabel) <= size)) { 3005 #ifdef DEBUG 3006 printf("Component on: %s: %llu\n", 3007 cname, (unsigned long long)size); 3008 rf_print_component_label(clabel); 3009 #endif 3010 /* if it's reasonable, add it, else ignore it. */ 3011 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 3012 M_NOWAIT); 3013 if (ac == NULL) { 3014 free(clabel, M_RAIDFRAME); 3015 goto oomem; 3016 } 3017 strlcpy(ac->devname, cname, sizeof(ac->devname)); 3018 ac->dev = dev; 3019 ac->vp = vp; 3020 ac->clabel = clabel; 3021 ac->next = ac_list; 3022 ac_list = ac; 3023 good_one = 1; 3024 } 3025 } 3026 if (!good_one) { 3027 /* cleanup */ 3028 free(clabel, M_RAIDFRAME); 3029 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3030 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3031 vput(vp); 3032 } 3033 return ac_list; 3034 } 3035 3036 RF_AutoConfig_t * 3037 rf_find_raid_components(void) 3038 { 3039 struct vnode *vp; 3040 struct disklabel label; 3041 device_t dv; 3042 deviter_t di; 3043 dev_t dev; 3044 int bmajor, bminor, wedge, rf_part_found; 3045 int error; 3046 int i; 3047 RF_AutoConfig_t *ac_list; 3048 uint64_t numsecs; 3049 unsigned secsize; 3050 3051 /* initialize the AutoConfig list */ 3052 ac_list = NULL; 3053 3054 /* we begin by trolling through *all* the devices on the system */ 3055 3056 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; 3057 dv = deviter_next(&di)) { 3058 3059 /* we are only interested in disks... */ 3060 if (device_class(dv) != DV_DISK) 3061 continue; 3062 3063 /* we don't care about floppies... */ 3064 if (device_is_a(dv, "fd")) { 3065 continue; 3066 } 3067 3068 /* we don't care about CD's... */ 3069 if (device_is_a(dv, "cd")) { 3070 continue; 3071 } 3072 3073 /* we don't care about md's... */ 3074 if (device_is_a(dv, "md")) { 3075 continue; 3076 } 3077 3078 /* hdfd is the Atari/Hades floppy driver */ 3079 if (device_is_a(dv, "hdfd")) { 3080 continue; 3081 } 3082 3083 /* fdisa is the Atari/Milan floppy driver */ 3084 if (device_is_a(dv, "fdisa")) { 3085 continue; 3086 } 3087 3088 /* need to find the device_name_to_block_device_major stuff */ 3089 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 3090 3091 rf_part_found = 0; /*No raid partition as yet*/ 3092 3093 /* get a vnode for the raw partition of this disk */ 3094 3095 wedge = device_is_a(dv, "dk"); 3096 bminor = minor(device_unit(dv)); 3097 dev = wedge ? makedev(bmajor, bminor) : 3098 MAKEDISKDEV(bmajor, bminor, RAW_PART); 3099 if (bdevvp(dev, &vp)) 3100 panic("RAID can't alloc vnode"); 3101 3102 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED); 3103 3104 if (error) { 3105 /* "Who cares." Continue looking 3106 for something that exists*/ 3107 vput(vp); 3108 continue; 3109 } 3110 3111 error = getdisksize(vp, &numsecs, &secsize); 3112 if (error) { 3113 vput(vp); 3114 continue; 3115 } 3116 if (wedge) { 3117 struct dkwedge_info dkw; 3118 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 3119 NOCRED); 3120 if (error) { 3121 printf("RAIDframe: can't get wedge info for " 3122 "dev %s (%d)\n", device_xname(dv), error); 3123 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3124 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3125 vput(vp); 3126 continue; 3127 } 3128 3129 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 3130 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3131 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3132 vput(vp); 3133 continue; 3134 } 3135 3136 ac_list = rf_get_component(ac_list, dev, vp, 3137 device_xname(dv), dkw.dkw_size, numsecs, secsize); 3138 rf_part_found = 1; /*There is a raid component on this disk*/ 3139 continue; 3140 } 3141 3142 /* Ok, the disk exists. Go get the disklabel. */ 3143 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 3144 if (error) { 3145 /* 3146 * XXX can't happen - open() would 3147 * have errored out (or faked up one) 3148 */ 3149 if (error != ENOTTY) 3150 printf("RAIDframe: can't get label for dev " 3151 "%s (%d)\n", device_xname(dv), error); 3152 } 3153 3154 /* don't need this any more. We'll allocate it again 3155 a little later if we really do... */ 3156 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3157 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3158 vput(vp); 3159 3160 if (error) 3161 continue; 3162 3163 rf_part_found = 0; /*No raid partitions yet*/ 3164 for (i = 0; i < label.d_npartitions; i++) { 3165 char cname[sizeof(ac_list->devname)]; 3166 3167 /* We only support partitions marked as RAID */ 3168 if (label.d_partitions[i].p_fstype != FS_RAID) 3169 continue; 3170 3171 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 3172 if (bdevvp(dev, &vp)) 3173 panic("RAID can't alloc vnode"); 3174 3175 error = VOP_OPEN(vp, FREAD, NOCRED); 3176 if (error) { 3177 /* Whatever... */ 3178 vput(vp); 3179 continue; 3180 } 3181 snprintf(cname, sizeof(cname), "%s%c", 3182 device_xname(dv), 'a' + i); 3183 ac_list = rf_get_component(ac_list, dev, vp, cname, 3184 label.d_partitions[i].p_size, numsecs, secsize); 3185 rf_part_found = 1; /*There is at least one raid partition on this disk*/ 3186 } 3187 3188 /* 3189 *If there is no raid component on this disk, either in a 3190 *disklabel or inside a wedge, check the raw partition as well, 3191 *as it is possible to configure raid components on raw disk 3192 *devices. 3193 */ 3194 3195 if (!rf_part_found) { 3196 char cname[sizeof(ac_list->devname)]; 3197 3198 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART); 3199 if (bdevvp(dev, &vp)) 3200 panic("RAID can't alloc vnode"); 3201 3202 error = VOP_OPEN(vp, FREAD, NOCRED); 3203 if (error) { 3204 /* Whatever... */ 3205 vput(vp); 3206 continue; 3207 } 3208 snprintf(cname, sizeof(cname), "%s%c", 3209 device_xname(dv), 'a' + RAW_PART); 3210 ac_list = rf_get_component(ac_list, dev, vp, cname, 3211 label.d_partitions[RAW_PART].p_size, numsecs, secsize); 3212 } 3213 } 3214 deviter_release(&di); 3215 return ac_list; 3216 } 3217 3218 3219 int 3220 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3221 { 3222 3223 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 3224 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 3225 ((clabel->clean == RF_RAID_CLEAN) || 3226 (clabel->clean == RF_RAID_DIRTY)) && 3227 clabel->row >=0 && 3228 clabel->column >= 0 && 3229 clabel->num_rows > 0 && 3230 clabel->num_columns > 0 && 3231 clabel->row < clabel->num_rows && 3232 clabel->column < clabel->num_columns && 3233 clabel->blockSize > 0 && 3234 /* 3235 * numBlocksHi may contain garbage, but it is ok since 3236 * the type is unsigned. If it is really garbage, 3237 * rf_fix_old_label_size() will fix it. 3238 */ 3239 rf_component_label_numblocks(clabel) > 0) { 3240 /* 3241 * label looks reasonable enough... 3242 * let's make sure it has no old garbage. 3243 */ 3244 if (numsecs) 3245 rf_fix_old_label_size(clabel, numsecs); 3246 return(1); 3247 } 3248 return(0); 3249 } 3250 3251 3252 /* 3253 * For reasons yet unknown, some old component labels have garbage in 3254 * the newer numBlocksHi region, and this causes lossage. Since those 3255 * disks will also have numsecs set to less than 32 bits of sectors, 3256 * we can determine when this corruption has occurred, and fix it. 3257 * 3258 * The exact same problem, with the same unknown reason, happens to 3259 * the partitionSizeHi member as well. 3260 */ 3261 static void 3262 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3263 { 3264 3265 if (numsecs < ((uint64_t)1 << 32)) { 3266 if (clabel->numBlocksHi) { 3267 printf("WARNING: total sectors < 32 bits, yet " 3268 "numBlocksHi set\n" 3269 "WARNING: resetting numBlocksHi to zero.\n"); 3270 clabel->numBlocksHi = 0; 3271 } 3272 3273 if (clabel->partitionSizeHi) { 3274 printf("WARNING: total sectors < 32 bits, yet " 3275 "partitionSizeHi set\n" 3276 "WARNING: resetting partitionSizeHi to zero.\n"); 3277 clabel->partitionSizeHi = 0; 3278 } 3279 } 3280 } 3281 3282 3283 #ifdef DEBUG 3284 void 3285 rf_print_component_label(RF_ComponentLabel_t *clabel) 3286 { 3287 uint64_t numBlocks; 3288 3289 numBlocks = rf_component_label_numblocks(clabel); 3290 3291 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3292 clabel->row, clabel->column, 3293 clabel->num_rows, clabel->num_columns); 3294 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3295 clabel->version, clabel->serial_number, 3296 clabel->mod_counter); 3297 printf(" Clean: %s Status: %d\n", 3298 clabel->clean ? "Yes" : "No", clabel->status); 3299 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3300 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3301 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n", 3302 (char) clabel->parityConfig, clabel->blockSize, numBlocks); 3303 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No"); 3304 printf(" Contains root partition: %s\n", 3305 clabel->root_partition ? "Yes" : "No"); 3306 printf(" Last configured as: raid%d\n", clabel->last_unit); 3307 #if 0 3308 printf(" Config order: %d\n", clabel->config_order); 3309 #endif 3310 3311 } 3312 #endif 3313 3314 RF_ConfigSet_t * 3315 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3316 { 3317 RF_AutoConfig_t *ac; 3318 RF_ConfigSet_t *config_sets; 3319 RF_ConfigSet_t *cset; 3320 RF_AutoConfig_t *ac_next; 3321 3322 3323 config_sets = NULL; 3324 3325 /* Go through the AutoConfig list, and figure out which components 3326 belong to what sets. */ 3327 ac = ac_list; 3328 while(ac!=NULL) { 3329 /* we're going to putz with ac->next, so save it here 3330 for use at the end of the loop */ 3331 ac_next = ac->next; 3332 3333 if (config_sets == NULL) { 3334 /* will need at least this one... */ 3335 config_sets = (RF_ConfigSet_t *) 3336 malloc(sizeof(RF_ConfigSet_t), 3337 M_RAIDFRAME, M_NOWAIT); 3338 if (config_sets == NULL) { 3339 panic("rf_create_auto_sets: No memory!"); 3340 } 3341 /* this one is easy :) */ 3342 config_sets->ac = ac; 3343 config_sets->next = NULL; 3344 config_sets->rootable = 0; 3345 ac->next = NULL; 3346 } else { 3347 /* which set does this component fit into? */ 3348 cset = config_sets; 3349 while(cset!=NULL) { 3350 if (rf_does_it_fit(cset, ac)) { 3351 /* looks like it matches... */ 3352 ac->next = cset->ac; 3353 cset->ac = ac; 3354 break; 3355 } 3356 cset = cset->next; 3357 } 3358 if (cset==NULL) { 3359 /* didn't find a match above... new set..*/ 3360 cset = (RF_ConfigSet_t *) 3361 malloc(sizeof(RF_ConfigSet_t), 3362 M_RAIDFRAME, M_NOWAIT); 3363 if (cset == NULL) { 3364 panic("rf_create_auto_sets: No memory!"); 3365 } 3366 cset->ac = ac; 3367 ac->next = NULL; 3368 cset->next = config_sets; 3369 cset->rootable = 0; 3370 config_sets = cset; 3371 } 3372 } 3373 ac = ac_next; 3374 } 3375 3376 3377 return(config_sets); 3378 } 3379 3380 static int 3381 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3382 { 3383 RF_ComponentLabel_t *clabel1, *clabel2; 3384 3385 /* If this one matches the *first* one in the set, that's good 3386 enough, since the other members of the set would have been 3387 through here too... */ 3388 /* note that we are not checking partitionSize here.. 3389 3390 Note that we are also not checking the mod_counters here. 3391 If everything else matches except the mod_counter, that's 3392 good enough for this test. We will deal with the mod_counters 3393 a little later in the autoconfiguration process. 3394 3395 (clabel1->mod_counter == clabel2->mod_counter) && 3396 3397 The reason we don't check for this is that failed disks 3398 will have lower modification counts. If those disks are 3399 not added to the set they used to belong to, then they will 3400 form their own set, which may result in 2 different sets, 3401 for example, competing to be configured at raid0, and 3402 perhaps competing to be the root filesystem set. If the 3403 wrong ones get configured, or both attempt to become /, 3404 weird behaviour and or serious lossage will occur. Thus we 3405 need to bring them into the fold here, and kick them out at 3406 a later point. 3407 3408 */ 3409 3410 clabel1 = cset->ac->clabel; 3411 clabel2 = ac->clabel; 3412 if ((clabel1->version == clabel2->version) && 3413 (clabel1->serial_number == clabel2->serial_number) && 3414 (clabel1->num_rows == clabel2->num_rows) && 3415 (clabel1->num_columns == clabel2->num_columns) && 3416 (clabel1->sectPerSU == clabel2->sectPerSU) && 3417 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3418 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3419 (clabel1->parityConfig == clabel2->parityConfig) && 3420 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3421 (clabel1->blockSize == clabel2->blockSize) && 3422 rf_component_label_numblocks(clabel1) == 3423 rf_component_label_numblocks(clabel2) && 3424 (clabel1->autoconfigure == clabel2->autoconfigure) && 3425 (clabel1->root_partition == clabel2->root_partition) && 3426 (clabel1->last_unit == clabel2->last_unit) && 3427 (clabel1->config_order == clabel2->config_order)) { 3428 /* if it get's here, it almost *has* to be a match */ 3429 } else { 3430 /* it's not consistent with somebody in the set.. 3431 punt */ 3432 return(0); 3433 } 3434 /* all was fine.. it must fit... */ 3435 return(1); 3436 } 3437 3438 int 3439 rf_have_enough_components(RF_ConfigSet_t *cset) 3440 { 3441 RF_AutoConfig_t *ac; 3442 RF_AutoConfig_t *auto_config; 3443 RF_ComponentLabel_t *clabel; 3444 int c; 3445 int num_cols; 3446 int num_missing; 3447 int mod_counter; 3448 int mod_counter_found; 3449 int even_pair_failed; 3450 char parity_type; 3451 3452 3453 /* check to see that we have enough 'live' components 3454 of this set. If so, we can configure it if necessary */ 3455 3456 num_cols = cset->ac->clabel->num_columns; 3457 parity_type = cset->ac->clabel->parityConfig; 3458 3459 /* XXX Check for duplicate components!?!?!? */ 3460 3461 /* Determine what the mod_counter is supposed to be for this set. */ 3462 3463 mod_counter_found = 0; 3464 mod_counter = 0; 3465 ac = cset->ac; 3466 while(ac!=NULL) { 3467 if (mod_counter_found==0) { 3468 mod_counter = ac->clabel->mod_counter; 3469 mod_counter_found = 1; 3470 } else { 3471 if (ac->clabel->mod_counter > mod_counter) { 3472 mod_counter = ac->clabel->mod_counter; 3473 } 3474 } 3475 ac = ac->next; 3476 } 3477 3478 num_missing = 0; 3479 auto_config = cset->ac; 3480 3481 even_pair_failed = 0; 3482 for(c=0; c<num_cols; c++) { 3483 ac = auto_config; 3484 while(ac!=NULL) { 3485 if ((ac->clabel->column == c) && 3486 (ac->clabel->mod_counter == mod_counter)) { 3487 /* it's this one... */ 3488 #ifdef DEBUG 3489 printf("Found: %s at %d\n", 3490 ac->devname,c); 3491 #endif 3492 break; 3493 } 3494 ac=ac->next; 3495 } 3496 if (ac==NULL) { 3497 /* Didn't find one here! */ 3498 /* special case for RAID 1, especially 3499 where there are more than 2 3500 components (where RAIDframe treats 3501 things a little differently :( ) */ 3502 if (parity_type == '1') { 3503 if (c%2 == 0) { /* even component */ 3504 even_pair_failed = 1; 3505 } else { /* odd component. If 3506 we're failed, and 3507 so is the even 3508 component, it's 3509 "Good Night, Charlie" */ 3510 if (even_pair_failed == 1) { 3511 return(0); 3512 } 3513 } 3514 } else { 3515 /* normal accounting */ 3516 num_missing++; 3517 } 3518 } 3519 if ((parity_type == '1') && (c%2 == 1)) { 3520 /* Just did an even component, and we didn't 3521 bail.. reset the even_pair_failed flag, 3522 and go on to the next component.... */ 3523 even_pair_failed = 0; 3524 } 3525 } 3526 3527 clabel = cset->ac->clabel; 3528 3529 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3530 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3531 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3532 /* XXX this needs to be made *much* more general */ 3533 /* Too many failures */ 3534 return(0); 3535 } 3536 /* otherwise, all is well, and we've got enough to take a kick 3537 at autoconfiguring this set */ 3538 return(1); 3539 } 3540 3541 void 3542 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3543 RF_Raid_t *raidPtr) 3544 { 3545 RF_ComponentLabel_t *clabel; 3546 int i; 3547 3548 clabel = ac->clabel; 3549 3550 /* 1. Fill in the common stuff */ 3551 config->numRow = clabel->num_rows = 1; 3552 config->numCol = clabel->num_columns; 3553 config->numSpare = 0; /* XXX should this be set here? */ 3554 config->sectPerSU = clabel->sectPerSU; 3555 config->SUsPerPU = clabel->SUsPerPU; 3556 config->SUsPerRU = clabel->SUsPerRU; 3557 config->parityConfig = clabel->parityConfig; 3558 /* XXX... */ 3559 strcpy(config->diskQueueType,"fifo"); 3560 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3561 config->layoutSpecificSize = 0; /* XXX ?? */ 3562 3563 while(ac!=NULL) { 3564 /* row/col values will be in range due to the checks 3565 in reasonable_label() */ 3566 strcpy(config->devnames[0][ac->clabel->column], 3567 ac->devname); 3568 ac = ac->next; 3569 } 3570 3571 for(i=0;i<RF_MAXDBGV;i++) { 3572 config->debugVars[i][0] = 0; 3573 } 3574 } 3575 3576 int 3577 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3578 { 3579 RF_ComponentLabel_t *clabel; 3580 int column; 3581 int sparecol; 3582 3583 raidPtr->autoconfigure = new_value; 3584 3585 for(column=0; column<raidPtr->numCol; column++) { 3586 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3587 clabel = raidget_component_label(raidPtr, column); 3588 clabel->autoconfigure = new_value; 3589 raidflush_component_label(raidPtr, column); 3590 } 3591 } 3592 for(column = 0; column < raidPtr->numSpare ; column++) { 3593 sparecol = raidPtr->numCol + column; 3594 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3595 clabel = raidget_component_label(raidPtr, sparecol); 3596 clabel->autoconfigure = new_value; 3597 raidflush_component_label(raidPtr, sparecol); 3598 } 3599 } 3600 return(new_value); 3601 } 3602 3603 int 3604 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3605 { 3606 RF_ComponentLabel_t *clabel; 3607 int column; 3608 int sparecol; 3609 3610 raidPtr->root_partition = new_value; 3611 for(column=0; column<raidPtr->numCol; column++) { 3612 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3613 clabel = raidget_component_label(raidPtr, column); 3614 clabel->root_partition = new_value; 3615 raidflush_component_label(raidPtr, column); 3616 } 3617 } 3618 for(column = 0; column < raidPtr->numSpare ; column++) { 3619 sparecol = raidPtr->numCol + column; 3620 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3621 clabel = raidget_component_label(raidPtr, sparecol); 3622 clabel->root_partition = new_value; 3623 raidflush_component_label(raidPtr, sparecol); 3624 } 3625 } 3626 return(new_value); 3627 } 3628 3629 void 3630 rf_release_all_vps(RF_ConfigSet_t *cset) 3631 { 3632 RF_AutoConfig_t *ac; 3633 3634 ac = cset->ac; 3635 while(ac!=NULL) { 3636 /* Close the vp, and give it back */ 3637 if (ac->vp) { 3638 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3639 VOP_CLOSE(ac->vp, FREAD, NOCRED); 3640 vput(ac->vp); 3641 ac->vp = NULL; 3642 } 3643 ac = ac->next; 3644 } 3645 } 3646 3647 3648 void 3649 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3650 { 3651 RF_AutoConfig_t *ac; 3652 RF_AutoConfig_t *next_ac; 3653 3654 ac = cset->ac; 3655 while(ac!=NULL) { 3656 next_ac = ac->next; 3657 /* nuke the label */ 3658 free(ac->clabel, M_RAIDFRAME); 3659 /* cleanup the config structure */ 3660 free(ac, M_RAIDFRAME); 3661 /* "next.." */ 3662 ac = next_ac; 3663 } 3664 /* and, finally, nuke the config set */ 3665 free(cset, M_RAIDFRAME); 3666 } 3667 3668 3669 void 3670 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3671 { 3672 /* current version number */ 3673 clabel->version = RF_COMPONENT_LABEL_VERSION; 3674 clabel->serial_number = raidPtr->serial_number; 3675 clabel->mod_counter = raidPtr->mod_counter; 3676 3677 clabel->num_rows = 1; 3678 clabel->num_columns = raidPtr->numCol; 3679 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3680 clabel->status = rf_ds_optimal; /* "It's good!" */ 3681 3682 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3683 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3684 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3685 3686 clabel->blockSize = raidPtr->bytesPerSector; 3687 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk); 3688 3689 /* XXX not portable */ 3690 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3691 clabel->maxOutstanding = raidPtr->maxOutstanding; 3692 clabel->autoconfigure = raidPtr->autoconfigure; 3693 clabel->root_partition = raidPtr->root_partition; 3694 clabel->last_unit = raidPtr->raidid; 3695 clabel->config_order = raidPtr->config_order; 3696 3697 #ifndef RF_NO_PARITY_MAP 3698 rf_paritymap_init_label(raidPtr->parity_map, clabel); 3699 #endif 3700 } 3701 3702 int 3703 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit) 3704 { 3705 RF_Raid_t *raidPtr; 3706 RF_Config_t *config; 3707 int raidID; 3708 int retcode; 3709 3710 #ifdef DEBUG 3711 printf("RAID autoconfigure\n"); 3712 #endif 3713 3714 retcode = 0; 3715 *unit = -1; 3716 3717 /* 1. Create a config structure */ 3718 3719 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3720 M_RAIDFRAME, 3721 M_NOWAIT); 3722 if (config==NULL) { 3723 printf("Out of mem!?!?\n"); 3724 /* XXX do something more intelligent here. */ 3725 return(1); 3726 } 3727 3728 memset(config, 0, sizeof(RF_Config_t)); 3729 3730 /* 3731 2. Figure out what RAID ID this one is supposed to live at 3732 See if we can get the same RAID dev that it was configured 3733 on last time.. 3734 */ 3735 3736 raidID = cset->ac->clabel->last_unit; 3737 if ((raidID < 0) || (raidID >= numraid)) { 3738 /* let's not wander off into lala land. */ 3739 raidID = numraid - 1; 3740 } 3741 if (raidPtrs[raidID]->valid != 0) { 3742 3743 /* 3744 Nope... Go looking for an alternative... 3745 Start high so we don't immediately use raid0 if that's 3746 not taken. 3747 */ 3748 3749 for(raidID = numraid - 1; raidID >= 0; raidID--) { 3750 if (raidPtrs[raidID]->valid == 0) { 3751 /* can use this one! */ 3752 break; 3753 } 3754 } 3755 } 3756 3757 if (raidID < 0) { 3758 /* punt... */ 3759 printf("Unable to auto configure this set!\n"); 3760 printf("(Out of RAID devs!)\n"); 3761 free(config, M_RAIDFRAME); 3762 return(1); 3763 } 3764 3765 #ifdef DEBUG 3766 printf("Configuring raid%d:\n",raidID); 3767 #endif 3768 3769 raidPtr = raidPtrs[raidID]; 3770 3771 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3772 raidPtr->raidid = raidID; 3773 raidPtr->openings = RAIDOUTSTANDING; 3774 3775 /* 3. Build the configuration structure */ 3776 rf_create_configuration(cset->ac, config, raidPtr); 3777 3778 /* 4. Do the configuration */ 3779 retcode = rf_Configure(raidPtr, config, cset->ac); 3780 3781 if (retcode == 0) { 3782 3783 raidinit(raidPtrs[raidID]); 3784 3785 rf_markalldirty(raidPtrs[raidID]); 3786 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3787 if (cset->ac->clabel->root_partition==1) { 3788 /* everything configured just fine. Make a note 3789 that this set is eligible to be root. */ 3790 cset->rootable = 1; 3791 /* XXX do this here? */ 3792 raidPtrs[raidID]->root_partition = 1; 3793 } 3794 } 3795 3796 /* 5. Cleanup */ 3797 free(config, M_RAIDFRAME); 3798 3799 *unit = raidID; 3800 return(retcode); 3801 } 3802 3803 void 3804 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3805 { 3806 struct buf *bp; 3807 3808 bp = (struct buf *)desc->bp; 3809 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3810 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ)); 3811 } 3812 3813 void 3814 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3815 size_t xmin, size_t xmax) 3816 { 3817 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3818 pool_sethiwat(p, xmax); 3819 pool_prime(p, xmin); 3820 pool_setlowat(p, xmin); 3821 } 3822 3823 /* 3824 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see 3825 * if there is IO pending and if that IO could possibly be done for a 3826 * given RAID set. Returns 0 if IO is waiting and can be done, 1 3827 * otherwise. 3828 * 3829 */ 3830 3831 int 3832 rf_buf_queue_check(int raidid) 3833 { 3834 if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) && 3835 raidPtrs[raidid]->openings > 0) { 3836 /* there is work to do */ 3837 return 0; 3838 } 3839 /* default is nothing to do */ 3840 return 1; 3841 } 3842 3843 int 3844 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr) 3845 { 3846 uint64_t numsecs; 3847 unsigned secsize; 3848 int error; 3849 3850 error = getdisksize(vp, &numsecs, &secsize); 3851 if (error == 0) { 3852 diskPtr->blockSize = secsize; 3853 diskPtr->numBlocks = numsecs - rf_protectedSectors; 3854 diskPtr->partitionSize = numsecs; 3855 return 0; 3856 } 3857 return error; 3858 } 3859 3860 static int 3861 raid_match(device_t self, cfdata_t cfdata, void *aux) 3862 { 3863 return 1; 3864 } 3865 3866 static void 3867 raid_attach(device_t parent, device_t self, void *aux) 3868 { 3869 3870 } 3871 3872 3873 static int 3874 raid_detach(device_t self, int flags) 3875 { 3876 int error; 3877 struct raid_softc *rs = &raid_softc[device_unit(self)]; 3878 3879 if ((error = raidlock(rs)) != 0) 3880 return (error); 3881 3882 error = raid_detach_unlocked(rs); 3883 3884 raidunlock(rs); 3885 3886 return error; 3887 } 3888 3889 static void 3890 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr) 3891 { 3892 prop_dictionary_t disk_info, odisk_info, geom; 3893 disk_info = prop_dictionary_create(); 3894 geom = prop_dictionary_create(); 3895 prop_dictionary_set_uint64(geom, "sectors-per-unit", 3896 raidPtr->totalSectors); 3897 prop_dictionary_set_uint32(geom, "sector-size", 3898 raidPtr->bytesPerSector); 3899 3900 prop_dictionary_set_uint16(geom, "sectors-per-track", 3901 raidPtr->Layout.dataSectorsPerStripe); 3902 prop_dictionary_set_uint16(geom, "tracks-per-cylinder", 3903 4 * raidPtr->numCol); 3904 3905 prop_dictionary_set_uint64(geom, "cylinders-per-unit", 3906 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe * 3907 (4 * raidPtr->numCol))); 3908 3909 prop_dictionary_set(disk_info, "geometry", geom); 3910 prop_object_release(geom); 3911 prop_dictionary_set(device_properties(rs->sc_dev), 3912 "disk-info", disk_info); 3913 odisk_info = rs->sc_dkdev.dk_info; 3914 rs->sc_dkdev.dk_info = disk_info; 3915 if (odisk_info) 3916 prop_object_release(odisk_info); 3917 } 3918 3919 /* 3920 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components. 3921 * We end up returning whatever error was returned by the first cache flush 3922 * that fails. 3923 */ 3924 3925 int 3926 rf_sync_component_caches(RF_Raid_t *raidPtr) 3927 { 3928 int c, sparecol; 3929 int e,error; 3930 int force = 1; 3931 3932 error = 0; 3933 for (c = 0; c < raidPtr->numCol; c++) { 3934 if (raidPtr->Disks[c].status == rf_ds_optimal) { 3935 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC, 3936 &force, FWRITE, NOCRED); 3937 if (e) { 3938 if (e != ENODEV) 3939 printf("raid%d: cache flush to component %s failed.\n", 3940 raidPtr->raidid, raidPtr->Disks[c].devname); 3941 if (error == 0) { 3942 error = e; 3943 } 3944 } 3945 } 3946 } 3947 3948 for( c = 0; c < raidPtr->numSpare ; c++) { 3949 sparecol = raidPtr->numCol + c; 3950 /* Need to ensure that the reconstruct actually completed! */ 3951 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3952 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp, 3953 DIOCCACHESYNC, &force, FWRITE, NOCRED); 3954 if (e) { 3955 if (e != ENODEV) 3956 printf("raid%d: cache flush to component %s failed.\n", 3957 raidPtr->raidid, raidPtr->Disks[sparecol].devname); 3958 if (error == 0) { 3959 error = e; 3960 } 3961 } 3962 } 3963 } 3964 return error; 3965 } 3966