1 /* $NetBSD: rf_netbsdkintf.c,v 1.356 2018/01/23 22:42:29 pgoyette Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Greg Oster; Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * Copyright (c) 1995 Carnegie-Mellon University. 72 * All rights reserved. 73 * 74 * Authors: Mark Holland, Jim Zelenka 75 * 76 * Permission to use, copy, modify and distribute this software and 77 * its documentation is hereby granted, provided that both the copyright 78 * notice and this permission notice appear in all copies of the 79 * software, derivative works or modified versions, and any portions 80 * thereof, and that both notices appear in supporting documentation. 81 * 82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 85 * 86 * Carnegie Mellon requests users of this software to return to 87 * 88 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 89 * School of Computer Science 90 * Carnegie Mellon University 91 * Pittsburgh PA 15213-3890 92 * 93 * any improvements or extensions that they make and grant Carnegie the 94 * rights to redistribute these changes. 95 */ 96 97 /*********************************************************** 98 * 99 * rf_kintf.c -- the kernel interface routines for RAIDframe 100 * 101 ***********************************************************/ 102 103 #include <sys/cdefs.h> 104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.356 2018/01/23 22:42:29 pgoyette Exp $"); 105 106 #ifdef _KERNEL_OPT 107 #include "opt_compat_netbsd.h" 108 #include "opt_compat_netbsd32.h" 109 #include "opt_raid_autoconfig.h" 110 #endif 111 112 #include <sys/param.h> 113 #include <sys/errno.h> 114 #include <sys/pool.h> 115 #include <sys/proc.h> 116 #include <sys/queue.h> 117 #include <sys/disk.h> 118 #include <sys/device.h> 119 #include <sys/stat.h> 120 #include <sys/ioctl.h> 121 #include <sys/fcntl.h> 122 #include <sys/systm.h> 123 #include <sys/vnode.h> 124 #include <sys/disklabel.h> 125 #include <sys/conf.h> 126 #include <sys/buf.h> 127 #include <sys/bufq.h> 128 #include <sys/reboot.h> 129 #include <sys/kauth.h> 130 #include <sys/module.h> 131 132 #include <prop/proplib.h> 133 134 #include <dev/raidframe/raidframevar.h> 135 #include <dev/raidframe/raidframeio.h> 136 #include <dev/raidframe/rf_paritymap.h> 137 138 #include "rf_raid.h" 139 #include "rf_copyback.h" 140 #include "rf_dag.h" 141 #include "rf_dagflags.h" 142 #include "rf_desc.h" 143 #include "rf_diskqueue.h" 144 #include "rf_etimer.h" 145 #include "rf_general.h" 146 #include "rf_kintf.h" 147 #include "rf_options.h" 148 #include "rf_driver.h" 149 #include "rf_parityscan.h" 150 #include "rf_threadstuff.h" 151 152 #ifdef COMPAT_50 153 #include "rf_compat50.h" 154 #endif 155 156 #ifdef COMPAT_80 157 #include "rf_compat80.h" 158 #endif 159 160 #ifdef COMPAT_NETBSD32 161 #include "rf_compat32.h" 162 #endif 163 164 #include "ioconf.h" 165 166 #ifdef DEBUG 167 int rf_kdebug_level = 0; 168 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 169 #else /* DEBUG */ 170 #define db1_printf(a) { } 171 #endif /* DEBUG */ 172 173 #ifdef DEBUG_ROOT 174 #define DPRINTF(a, ...) printf(a, __VA_ARGS__) 175 #else 176 #define DPRINTF(a, ...) 177 #endif 178 179 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 180 static rf_declare_mutex2(rf_sparet_wait_mutex); 181 static rf_declare_cond2(rf_sparet_wait_cv); 182 static rf_declare_cond2(rf_sparet_resp_cv); 183 184 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 185 * spare table */ 186 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 187 * installation process */ 188 #endif 189 190 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 191 192 /* prototypes */ 193 static void KernelWakeupFunc(struct buf *); 194 static void InitBP(struct buf *, struct vnode *, unsigned, 195 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 196 void *, int, struct proc *); 197 struct raid_softc; 198 static void raidinit(struct raid_softc *); 199 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp); 200 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *); 201 202 static int raid_match(device_t, cfdata_t, void *); 203 static void raid_attach(device_t, device_t, void *); 204 static int raid_detach(device_t, int); 205 206 static int raidread_component_area(dev_t, struct vnode *, void *, size_t, 207 daddr_t, daddr_t); 208 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t, 209 daddr_t, daddr_t, int); 210 211 static int raidwrite_component_label(unsigned, 212 dev_t, struct vnode *, RF_ComponentLabel_t *); 213 static int raidread_component_label(unsigned, 214 dev_t, struct vnode *, RF_ComponentLabel_t *); 215 216 static int raid_diskstart(device_t, struct buf *bp); 217 static int raid_dumpblocks(device_t, void *, daddr_t, int); 218 static int raid_lastclose(device_t); 219 220 static dev_type_open(raidopen); 221 static dev_type_close(raidclose); 222 static dev_type_read(raidread); 223 static dev_type_write(raidwrite); 224 static dev_type_ioctl(raidioctl); 225 static dev_type_strategy(raidstrategy); 226 static dev_type_dump(raiddump); 227 static dev_type_size(raidsize); 228 229 const struct bdevsw raid_bdevsw = { 230 .d_open = raidopen, 231 .d_close = raidclose, 232 .d_strategy = raidstrategy, 233 .d_ioctl = raidioctl, 234 .d_dump = raiddump, 235 .d_psize = raidsize, 236 .d_discard = nodiscard, 237 .d_flag = D_DISK 238 }; 239 240 const struct cdevsw raid_cdevsw = { 241 .d_open = raidopen, 242 .d_close = raidclose, 243 .d_read = raidread, 244 .d_write = raidwrite, 245 .d_ioctl = raidioctl, 246 .d_stop = nostop, 247 .d_tty = notty, 248 .d_poll = nopoll, 249 .d_mmap = nommap, 250 .d_kqfilter = nokqfilter, 251 .d_discard = nodiscard, 252 .d_flag = D_DISK 253 }; 254 255 static struct dkdriver rf_dkdriver = { 256 .d_open = raidopen, 257 .d_close = raidclose, 258 .d_strategy = raidstrategy, 259 .d_diskstart = raid_diskstart, 260 .d_dumpblocks = raid_dumpblocks, 261 .d_lastclose = raid_lastclose, 262 .d_minphys = minphys 263 }; 264 265 struct raid_softc { 266 struct dk_softc sc_dksc; 267 int sc_unit; 268 int sc_flags; /* flags */ 269 int sc_cflags; /* configuration flags */ 270 kmutex_t sc_mutex; /* interlock mutex */ 271 kcondvar_t sc_cv; /* and the condvar */ 272 uint64_t sc_size; /* size of the raid device */ 273 char sc_xname[20]; /* XXX external name */ 274 RF_Raid_t sc_r; 275 LIST_ENTRY(raid_softc) sc_link; 276 }; 277 /* sc_flags */ 278 #define RAIDF_INITED 0x01 /* unit has been initialized */ 279 #define RAIDF_SHUTDOWN 0x02 /* unit is being shutdown */ 280 #define RAIDF_DETACH 0x04 /* detach after final close */ 281 #define RAIDF_WANTED 0x08 /* someone waiting to obtain a lock */ 282 #define RAIDF_LOCKED 0x10 /* unit is locked */ 283 #define RAIDF_UNIT_CHANGED 0x20 /* unit is being changed */ 284 285 #define raidunit(x) DISKUNIT(x) 286 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc) 287 288 extern struct cfdriver raid_cd; 289 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc), 290 raid_match, raid_attach, raid_detach, NULL, NULL, NULL, 291 DVF_DETACH_SHUTDOWN); 292 293 /* Internal representation of a rf_recon_req */ 294 struct rf_recon_req_internal { 295 RF_RowCol_t col; 296 RF_ReconReqFlags_t flags; 297 void *raidPtr; 298 }; 299 300 /* 301 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 302 * Be aware that large numbers can allow the driver to consume a lot of 303 * kernel memory, especially on writes, and in degraded mode reads. 304 * 305 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 306 * a single 64K write will typically require 64K for the old data, 307 * 64K for the old parity, and 64K for the new parity, for a total 308 * of 192K (if the parity buffer is not re-used immediately). 309 * Even it if is used immediately, that's still 128K, which when multiplied 310 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 311 * 312 * Now in degraded mode, for example, a 64K read on the above setup may 313 * require data reconstruction, which will require *all* of the 4 remaining 314 * disks to participate -- 4 * 32K/disk == 128K again. 315 */ 316 317 #ifndef RAIDOUTSTANDING 318 #define RAIDOUTSTANDING 6 319 #endif 320 321 #define RAIDLABELDEV(dev) \ 322 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 323 324 /* declared here, and made public, for the benefit of KVM stuff.. */ 325 326 static int raidlock(struct raid_softc *); 327 static void raidunlock(struct raid_softc *); 328 329 static int raid_detach_unlocked(struct raid_softc *); 330 331 static void rf_markalldirty(RF_Raid_t *); 332 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *); 333 334 void rf_ReconThread(struct rf_recon_req_internal *); 335 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 336 void rf_CopybackThread(RF_Raid_t *raidPtr); 337 void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *); 338 int rf_autoconfig(device_t); 339 void rf_buildroothack(RF_ConfigSet_t *); 340 341 RF_AutoConfig_t *rf_find_raid_components(void); 342 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 343 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 344 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t); 345 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 346 int rf_set_autoconfig(RF_Raid_t *, int); 347 int rf_set_rootpartition(RF_Raid_t *, int); 348 void rf_release_all_vps(RF_ConfigSet_t *); 349 void rf_cleanup_config_set(RF_ConfigSet_t *); 350 int rf_have_enough_components(RF_ConfigSet_t *); 351 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *); 352 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t); 353 354 /* 355 * Debugging, mostly. Set to 0 to not allow autoconfig to take place. 356 * Note that this is overridden by having RAID_AUTOCONFIG as an option 357 * in the kernel config file. 358 */ 359 #ifdef RAID_AUTOCONFIG 360 int raidautoconfig = 1; 361 #else 362 int raidautoconfig = 0; 363 #endif 364 static bool raidautoconfigdone = false; 365 366 struct RF_Pools_s rf_pools; 367 368 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids); 369 static kmutex_t raid_lock; 370 371 static struct raid_softc * 372 raidcreate(int unit) { 373 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 374 sc->sc_unit = unit; 375 cv_init(&sc->sc_cv, "raidunit"); 376 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE); 377 return sc; 378 } 379 380 static void 381 raiddestroy(struct raid_softc *sc) { 382 cv_destroy(&sc->sc_cv); 383 mutex_destroy(&sc->sc_mutex); 384 kmem_free(sc, sizeof(*sc)); 385 } 386 387 static struct raid_softc * 388 raidget(int unit, bool create) { 389 struct raid_softc *sc; 390 if (unit < 0) { 391 #ifdef DIAGNOSTIC 392 panic("%s: unit %d!", __func__, unit); 393 #endif 394 return NULL; 395 } 396 mutex_enter(&raid_lock); 397 LIST_FOREACH(sc, &raids, sc_link) { 398 if (sc->sc_unit == unit) { 399 mutex_exit(&raid_lock); 400 return sc; 401 } 402 } 403 mutex_exit(&raid_lock); 404 if (!create) 405 return NULL; 406 if ((sc = raidcreate(unit)) == NULL) 407 return NULL; 408 mutex_enter(&raid_lock); 409 LIST_INSERT_HEAD(&raids, sc, sc_link); 410 mutex_exit(&raid_lock); 411 return sc; 412 } 413 414 static void 415 raidput(struct raid_softc *sc) { 416 mutex_enter(&raid_lock); 417 LIST_REMOVE(sc, sc_link); 418 mutex_exit(&raid_lock); 419 raiddestroy(sc); 420 } 421 422 void 423 raidattach(int num) 424 { 425 426 /* 427 * Device attachment and associated initialization now occurs 428 * as part of the module initialization. 429 */ 430 } 431 432 int 433 rf_autoconfig(device_t self) 434 { 435 RF_AutoConfig_t *ac_list; 436 RF_ConfigSet_t *config_sets; 437 438 if (!raidautoconfig || raidautoconfigdone == true) 439 return (0); 440 441 /* XXX This code can only be run once. */ 442 raidautoconfigdone = true; 443 444 #ifdef __HAVE_CPU_BOOTCONF 445 /* 446 * 0. find the boot device if needed first so we can use it later 447 * this needs to be done before we autoconfigure any raid sets, 448 * because if we use wedges we are not going to be able to open 449 * the boot device later 450 */ 451 if (booted_device == NULL) 452 cpu_bootconf(); 453 #endif 454 /* 1. locate all RAID components on the system */ 455 aprint_debug("Searching for RAID components...\n"); 456 ac_list = rf_find_raid_components(); 457 458 /* 2. Sort them into their respective sets. */ 459 config_sets = rf_create_auto_sets(ac_list); 460 461 /* 462 * 3. Evaluate each set and configure the valid ones. 463 * This gets done in rf_buildroothack(). 464 */ 465 rf_buildroothack(config_sets); 466 467 return 1; 468 } 469 470 static int 471 rf_containsboot(RF_Raid_t *r, device_t bdv) { 472 const char *bootname = device_xname(bdv); 473 size_t len = strlen(bootname); 474 475 for (int col = 0; col < r->numCol; col++) { 476 const char *devname = r->Disks[col].devname; 477 devname += sizeof("/dev/") - 1; 478 if (strncmp(devname, "dk", 2) == 0) { 479 const char *parent = 480 dkwedge_get_parent_name(r->Disks[col].dev); 481 if (parent != NULL) 482 devname = parent; 483 } 484 if (strncmp(devname, bootname, len) == 0) { 485 struct raid_softc *sc = r->softc; 486 aprint_debug("raid%d includes boot device %s\n", 487 sc->sc_unit, devname); 488 return 1; 489 } 490 } 491 return 0; 492 } 493 494 void 495 rf_buildroothack(RF_ConfigSet_t *config_sets) 496 { 497 RF_ConfigSet_t *cset; 498 RF_ConfigSet_t *next_cset; 499 int num_root; 500 struct raid_softc *sc, *rsc; 501 struct dk_softc *dksc; 502 503 sc = rsc = NULL; 504 num_root = 0; 505 cset = config_sets; 506 while (cset != NULL) { 507 next_cset = cset->next; 508 if (rf_have_enough_components(cset) && 509 cset->ac->clabel->autoconfigure == 1) { 510 sc = rf_auto_config_set(cset); 511 if (sc != NULL) { 512 aprint_debug("raid%d: configured ok\n", 513 sc->sc_unit); 514 if (cset->rootable) { 515 rsc = sc; 516 num_root++; 517 } 518 } else { 519 /* The autoconfig didn't work :( */ 520 aprint_debug("Autoconfig failed\n"); 521 rf_release_all_vps(cset); 522 } 523 } else { 524 /* we're not autoconfiguring this set... 525 release the associated resources */ 526 rf_release_all_vps(cset); 527 } 528 /* cleanup */ 529 rf_cleanup_config_set(cset); 530 cset = next_cset; 531 } 532 dksc = &rsc->sc_dksc; 533 534 /* if the user has specified what the root device should be 535 then we don't touch booted_device or boothowto... */ 536 537 if (rootspec != NULL) 538 return; 539 540 /* we found something bootable... */ 541 542 /* 543 * XXX: The following code assumes that the root raid 544 * is the first ('a') partition. This is about the best 545 * we can do with a BSD disklabel, but we might be able 546 * to do better with a GPT label, by setting a specified 547 * attribute to indicate the root partition. We can then 548 * stash the partition number in the r->root_partition 549 * high bits (the bottom 2 bits are already used). For 550 * now we just set booted_partition to 0 when we override 551 * root. 552 */ 553 if (num_root == 1) { 554 device_t candidate_root; 555 if (dksc->sc_dkdev.dk_nwedges != 0) { 556 char cname[sizeof(cset->ac->devname)]; 557 /* XXX: assume partition 'a' first */ 558 snprintf(cname, sizeof(cname), "%s%c", 559 device_xname(dksc->sc_dev), 'a'); 560 candidate_root = dkwedge_find_by_wname(cname); 561 DPRINTF("%s: candidate wedge root=%s\n", __func__, 562 cname); 563 if (candidate_root == NULL) { 564 /* 565 * If that is not found, because we don't use 566 * disklabel, return the first dk child 567 * XXX: we can skip the 'a' check above 568 * and always do this... 569 */ 570 size_t i = 0; 571 candidate_root = dkwedge_find_by_parent( 572 device_xname(dksc->sc_dev), &i); 573 } 574 DPRINTF("%s: candidate wedge root=%p\n", __func__, 575 candidate_root); 576 } else 577 candidate_root = dksc->sc_dev; 578 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root); 579 DPRINTF("%s: booted_device=%p root_partition=%d " 580 "contains_boot=%d\n", __func__, booted_device, 581 rsc->sc_r.root_partition, 582 rf_containsboot(&rsc->sc_r, booted_device)); 583 if (booted_device == NULL || 584 rsc->sc_r.root_partition == 1 || 585 rf_containsboot(&rsc->sc_r, booted_device)) { 586 booted_device = candidate_root; 587 booted_method = "raidframe/single"; 588 booted_partition = 0; /* XXX assume 'a' */ 589 } 590 } else if (num_root > 1) { 591 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root, 592 booted_device); 593 594 /* 595 * Maybe the MD code can help. If it cannot, then 596 * setroot() will discover that we have no 597 * booted_device and will ask the user if nothing was 598 * hardwired in the kernel config file 599 */ 600 if (booted_device == NULL) 601 return; 602 603 num_root = 0; 604 mutex_enter(&raid_lock); 605 LIST_FOREACH(sc, &raids, sc_link) { 606 RF_Raid_t *r = &sc->sc_r; 607 if (r->valid == 0) 608 continue; 609 610 if (r->root_partition == 0) 611 continue; 612 613 if (rf_containsboot(r, booted_device)) { 614 num_root++; 615 rsc = sc; 616 dksc = &rsc->sc_dksc; 617 } 618 } 619 mutex_exit(&raid_lock); 620 621 if (num_root == 1) { 622 booted_device = dksc->sc_dev; 623 booted_method = "raidframe/multi"; 624 booted_partition = 0; /* XXX assume 'a' */ 625 } else { 626 /* we can't guess.. require the user to answer... */ 627 boothowto |= RB_ASKNAME; 628 } 629 } 630 } 631 632 static int 633 raidsize(dev_t dev) 634 { 635 struct raid_softc *rs; 636 struct dk_softc *dksc; 637 unsigned int unit; 638 639 unit = raidunit(dev); 640 if ((rs = raidget(unit, false)) == NULL) 641 return -1; 642 dksc = &rs->sc_dksc; 643 644 if ((rs->sc_flags & RAIDF_INITED) == 0) 645 return -1; 646 647 return dk_size(dksc, dev); 648 } 649 650 static int 651 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 652 { 653 unsigned int unit; 654 struct raid_softc *rs; 655 struct dk_softc *dksc; 656 657 unit = raidunit(dev); 658 if ((rs = raidget(unit, false)) == NULL) 659 return ENXIO; 660 dksc = &rs->sc_dksc; 661 662 if ((rs->sc_flags & RAIDF_INITED) == 0) 663 return ENODEV; 664 665 /* 666 Note that blkno is relative to this particular partition. 667 By adding adding RF_PROTECTED_SECTORS, we get a value that 668 is relative to the partition used for the underlying component. 669 */ 670 blkno += RF_PROTECTED_SECTORS; 671 672 return dk_dump(dksc, dev, blkno, va, size); 673 } 674 675 static int 676 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk) 677 { 678 struct raid_softc *rs = raidsoftc(dev); 679 const struct bdevsw *bdev; 680 RF_Raid_t *raidPtr; 681 int c, sparecol, j, scol, dumpto; 682 int error = 0; 683 684 raidPtr = &rs->sc_r; 685 686 /* we only support dumping to RAID 1 sets */ 687 if (raidPtr->Layout.numDataCol != 1 || 688 raidPtr->Layout.numParityCol != 1) 689 return EINVAL; 690 691 if ((error = raidlock(rs)) != 0) 692 return error; 693 694 /* figure out what device is alive.. */ 695 696 /* 697 Look for a component to dump to. The preference for the 698 component to dump to is as follows: 699 1) the master 700 2) a used_spare of the master 701 3) the slave 702 4) a used_spare of the slave 703 */ 704 705 dumpto = -1; 706 for (c = 0; c < raidPtr->numCol; c++) { 707 if (raidPtr->Disks[c].status == rf_ds_optimal) { 708 /* this might be the one */ 709 dumpto = c; 710 break; 711 } 712 } 713 714 /* 715 At this point we have possibly selected a live master or a 716 live slave. We now check to see if there is a spared 717 master (or a spared slave), if we didn't find a live master 718 or a live slave. 719 */ 720 721 for (c = 0; c < raidPtr->numSpare; c++) { 722 sparecol = raidPtr->numCol + c; 723 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 724 /* How about this one? */ 725 scol = -1; 726 for(j=0;j<raidPtr->numCol;j++) { 727 if (raidPtr->Disks[j].spareCol == sparecol) { 728 scol = j; 729 break; 730 } 731 } 732 if (scol == 0) { 733 /* 734 We must have found a spared master! 735 We'll take that over anything else 736 found so far. (We couldn't have 737 found a real master before, since 738 this is a used spare, and it's 739 saying that it's replacing the 740 master.) On reboot (with 741 autoconfiguration turned on) 742 sparecol will become the 1st 743 component (component0) of this set. 744 */ 745 dumpto = sparecol; 746 break; 747 } else if (scol != -1) { 748 /* 749 Must be a spared slave. We'll dump 750 to that if we havn't found anything 751 else so far. 752 */ 753 if (dumpto == -1) 754 dumpto = sparecol; 755 } 756 } 757 } 758 759 if (dumpto == -1) { 760 /* we couldn't find any live components to dump to!?!? 761 */ 762 error = EINVAL; 763 goto out; 764 } 765 766 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 767 if (bdev == NULL) { 768 error = ENXIO; 769 goto out; 770 } 771 772 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 773 blkno, va, nblk * raidPtr->bytesPerSector); 774 775 out: 776 raidunlock(rs); 777 778 return error; 779 } 780 781 /* ARGSUSED */ 782 static int 783 raidopen(dev_t dev, int flags, int fmt, 784 struct lwp *l) 785 { 786 int unit = raidunit(dev); 787 struct raid_softc *rs; 788 struct dk_softc *dksc; 789 int error = 0; 790 int part, pmask; 791 792 if ((rs = raidget(unit, true)) == NULL) 793 return ENXIO; 794 if ((error = raidlock(rs)) != 0) 795 return (error); 796 797 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) { 798 error = EBUSY; 799 goto bad; 800 } 801 802 dksc = &rs->sc_dksc; 803 804 part = DISKPART(dev); 805 pmask = (1 << part); 806 807 if (!DK_BUSY(dksc, pmask) && 808 ((rs->sc_flags & RAIDF_INITED) != 0)) { 809 /* First one... mark things as dirty... Note that we *MUST* 810 have done a configure before this. I DO NOT WANT TO BE 811 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 812 THAT THEY BELONG TOGETHER!!!!! */ 813 /* XXX should check to see if we're only open for reading 814 here... If so, we needn't do this, but then need some 815 other way of keeping track of what's happened.. */ 816 817 rf_markalldirty(&rs->sc_r); 818 } 819 820 if ((rs->sc_flags & RAIDF_INITED) != 0) 821 error = dk_open(dksc, dev, flags, fmt, l); 822 823 bad: 824 raidunlock(rs); 825 826 return (error); 827 828 829 } 830 831 static int 832 raid_lastclose(device_t self) 833 { 834 struct raid_softc *rs = raidsoftc(self); 835 836 /* Last one... device is not unconfigured yet. 837 Device shutdown has taken care of setting the 838 clean bits if RAIDF_INITED is not set 839 mark things as clean... */ 840 841 rf_update_component_labels(&rs->sc_r, 842 RF_FINAL_COMPONENT_UPDATE); 843 844 /* pass to unlocked code */ 845 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) 846 rs->sc_flags |= RAIDF_DETACH; 847 848 return 0; 849 } 850 851 /* ARGSUSED */ 852 static int 853 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 854 { 855 int unit = raidunit(dev); 856 struct raid_softc *rs; 857 struct dk_softc *dksc; 858 cfdata_t cf; 859 int error = 0, do_detach = 0, do_put = 0; 860 861 if ((rs = raidget(unit, false)) == NULL) 862 return ENXIO; 863 dksc = &rs->sc_dksc; 864 865 if ((error = raidlock(rs)) != 0) 866 return (error); 867 868 if ((rs->sc_flags & RAIDF_INITED) != 0) { 869 error = dk_close(dksc, dev, flags, fmt, l); 870 if ((rs->sc_flags & RAIDF_DETACH) != 0) 871 do_detach = 1; 872 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) 873 do_put = 1; 874 875 raidunlock(rs); 876 877 if (do_detach) { 878 /* free the pseudo device attach bits */ 879 cf = device_cfdata(dksc->sc_dev); 880 error = config_detach(dksc->sc_dev, 0); 881 if (error == 0) 882 free(cf, M_RAIDFRAME); 883 } else if (do_put) { 884 raidput(rs); 885 } 886 887 return (error); 888 889 } 890 891 static void 892 raid_wakeup(RF_Raid_t *raidPtr) 893 { 894 rf_lock_mutex2(raidPtr->iodone_lock); 895 rf_signal_cond2(raidPtr->iodone_cv); 896 rf_unlock_mutex2(raidPtr->iodone_lock); 897 } 898 899 static void 900 raidstrategy(struct buf *bp) 901 { 902 unsigned int unit; 903 struct raid_softc *rs; 904 struct dk_softc *dksc; 905 RF_Raid_t *raidPtr; 906 907 unit = raidunit(bp->b_dev); 908 if ((rs = raidget(unit, false)) == NULL) { 909 bp->b_error = ENXIO; 910 goto fail; 911 } 912 if ((rs->sc_flags & RAIDF_INITED) == 0) { 913 bp->b_error = ENXIO; 914 goto fail; 915 } 916 dksc = &rs->sc_dksc; 917 raidPtr = &rs->sc_r; 918 919 /* Queue IO only */ 920 if (dk_strategy_defer(dksc, bp)) 921 goto done; 922 923 /* schedule the IO to happen at the next convenient time */ 924 raid_wakeup(raidPtr); 925 926 done: 927 return; 928 929 fail: 930 bp->b_resid = bp->b_bcount; 931 biodone(bp); 932 } 933 934 static int 935 raid_diskstart(device_t dev, struct buf *bp) 936 { 937 struct raid_softc *rs = raidsoftc(dev); 938 RF_Raid_t *raidPtr; 939 940 raidPtr = &rs->sc_r; 941 if (!raidPtr->valid) { 942 db1_printf(("raid is not valid..\n")); 943 return ENODEV; 944 } 945 946 /* XXX */ 947 bp->b_resid = 0; 948 949 return raiddoaccess(raidPtr, bp); 950 } 951 952 void 953 raiddone(RF_Raid_t *raidPtr, struct buf *bp) 954 { 955 struct raid_softc *rs; 956 struct dk_softc *dksc; 957 958 rs = raidPtr->softc; 959 dksc = &rs->sc_dksc; 960 961 dk_done(dksc, bp); 962 963 rf_lock_mutex2(raidPtr->mutex); 964 raidPtr->openings++; 965 rf_unlock_mutex2(raidPtr->mutex); 966 967 /* schedule more IO */ 968 raid_wakeup(raidPtr); 969 } 970 971 /* ARGSUSED */ 972 static int 973 raidread(dev_t dev, struct uio *uio, int flags) 974 { 975 int unit = raidunit(dev); 976 struct raid_softc *rs; 977 978 if ((rs = raidget(unit, false)) == NULL) 979 return ENXIO; 980 981 if ((rs->sc_flags & RAIDF_INITED) == 0) 982 return (ENXIO); 983 984 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 985 986 } 987 988 /* ARGSUSED */ 989 static int 990 raidwrite(dev_t dev, struct uio *uio, int flags) 991 { 992 int unit = raidunit(dev); 993 struct raid_softc *rs; 994 995 if ((rs = raidget(unit, false)) == NULL) 996 return ENXIO; 997 998 if ((rs->sc_flags & RAIDF_INITED) == 0) 999 return (ENXIO); 1000 1001 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 1002 1003 } 1004 1005 static int 1006 raid_detach_unlocked(struct raid_softc *rs) 1007 { 1008 struct dk_softc *dksc = &rs->sc_dksc; 1009 RF_Raid_t *raidPtr; 1010 int error; 1011 1012 raidPtr = &rs->sc_r; 1013 1014 if (DK_BUSY(dksc, 0) || 1015 raidPtr->recon_in_progress != 0 || 1016 raidPtr->parity_rewrite_in_progress != 0 || 1017 raidPtr->copyback_in_progress != 0) 1018 return EBUSY; 1019 1020 if ((rs->sc_flags & RAIDF_INITED) == 0) 1021 return 0; 1022 1023 rs->sc_flags &= ~RAIDF_SHUTDOWN; 1024 1025 if ((error = rf_Shutdown(raidPtr)) != 0) 1026 return error; 1027 1028 rs->sc_flags &= ~RAIDF_INITED; 1029 1030 /* Kill off any queued buffers */ 1031 dk_drain(dksc); 1032 bufq_free(dksc->sc_bufq); 1033 1034 /* Detach the disk. */ 1035 dkwedge_delall(&dksc->sc_dkdev); 1036 disk_detach(&dksc->sc_dkdev); 1037 disk_destroy(&dksc->sc_dkdev); 1038 dk_detach(dksc); 1039 1040 return 0; 1041 } 1042 1043 static int 1044 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1045 { 1046 int unit = raidunit(dev); 1047 int error = 0; 1048 int part, pmask; 1049 struct raid_softc *rs; 1050 struct dk_softc *dksc; 1051 RF_Config_t *k_cfg, *u_cfg; 1052 RF_Raid_t *raidPtr; 1053 RF_RaidDisk_t *diskPtr; 1054 RF_AccTotals_t *totals; 1055 RF_DeviceConfig_t *d_cfg, *ucfgp; 1056 u_char *specific_buf; 1057 int retcode = 0; 1058 int column; 1059 /* int raidid; */ 1060 struct rf_recon_req *rr; 1061 struct rf_recon_req_internal *rrint; 1062 RF_ComponentLabel_t *clabel; 1063 RF_ComponentLabel_t *ci_label; 1064 RF_SingleComponent_t *sparePtr,*componentPtr; 1065 RF_SingleComponent_t component; 1066 int d; 1067 1068 if ((rs = raidget(unit, false)) == NULL) 1069 return ENXIO; 1070 dksc = &rs->sc_dksc; 1071 raidPtr = &rs->sc_r; 1072 1073 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev, 1074 (int) DISKPART(dev), (int) unit, cmd)); 1075 1076 /* Must be initialized for these... */ 1077 switch (cmd) { 1078 case RAIDFRAME_REWRITEPARITY: 1079 case RAIDFRAME_GET_INFO: 1080 case RAIDFRAME_RESET_ACCTOTALS: 1081 case RAIDFRAME_GET_ACCTOTALS: 1082 case RAIDFRAME_KEEP_ACCTOTALS: 1083 case RAIDFRAME_GET_SIZE: 1084 case RAIDFRAME_FAIL_DISK: 1085 case RAIDFRAME_COPYBACK: 1086 case RAIDFRAME_CHECK_RECON_STATUS: 1087 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1088 case RAIDFRAME_GET_COMPONENT_LABEL: 1089 case RAIDFRAME_SET_COMPONENT_LABEL: 1090 case RAIDFRAME_ADD_HOT_SPARE: 1091 case RAIDFRAME_REMOVE_HOT_SPARE: 1092 case RAIDFRAME_INIT_LABELS: 1093 case RAIDFRAME_REBUILD_IN_PLACE: 1094 case RAIDFRAME_CHECK_PARITY: 1095 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1096 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1097 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1098 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1099 case RAIDFRAME_SET_AUTOCONFIG: 1100 case RAIDFRAME_SET_ROOT: 1101 case RAIDFRAME_DELETE_COMPONENT: 1102 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1103 case RAIDFRAME_PARITYMAP_STATUS: 1104 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1105 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1106 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1107 #ifdef COMPAT_50 1108 case RAIDFRAME_GET_INFO50: 1109 #endif 1110 #ifdef COMPAT_80 1111 case RAIDFRAME_CHECK_RECON_STATUS_EXT80: 1112 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT80: 1113 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT80: 1114 case RAIDFRAME_GET_INFO80: 1115 case RAIDFRAME_GET_COMPONENT_LABEL80: 1116 #endif 1117 #ifdef COMPAT_NETBSD32 1118 #ifdef _LP64 1119 case RAIDFRAME_GET_INFO32: 1120 #endif 1121 #endif 1122 if ((rs->sc_flags & RAIDF_INITED) == 0) 1123 return (ENXIO); 1124 } 1125 1126 switch (cmd) { 1127 #ifdef COMPAT_50 1128 case RAIDFRAME_GET_INFO50: 1129 return rf_get_info50(raidPtr, data); 1130 1131 case RAIDFRAME_CONFIGURE50: 1132 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0) 1133 return retcode; 1134 goto config; 1135 #endif 1136 1137 #ifdef COMPAT_80 1138 case RAIDFRAME_CHECK_RECON_STATUS_EXT80: 1139 return rf_check_recon_status_ext80(raidPtr, data); 1140 1141 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT80: 1142 return rf_check_parityrewrite_status_ext80(raidPtr, data); 1143 1144 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT80: 1145 return rf_check_copyback_status_ext80(raidPtr, data); 1146 1147 case RAIDFRAME_GET_INFO80: 1148 return rf_get_info80(raidPtr, data); 1149 1150 case RAIDFRAME_GET_COMPONENT_LABEL80: 1151 return rf_get_component_label80(raidPtr, data); 1152 1153 case RAIDFRAME_CONFIGURE80: 1154 if ((retcode = rf_config80(raidPtr, unit, data, &k_cfg)) != 0) 1155 return retcode; 1156 goto config; 1157 #endif 1158 1159 /* configure the system */ 1160 case RAIDFRAME_CONFIGURE: 1161 #ifdef COMPAT_NETBSD32 1162 #ifdef _LP64 1163 case RAIDFRAME_CONFIGURE32: 1164 #endif 1165 #endif 1166 1167 if (raidPtr->valid) { 1168 /* There is a valid RAID set running on this unit! */ 1169 printf("raid%d: Device already configured!\n",unit); 1170 return(EINVAL); 1171 } 1172 1173 /* copy-in the configuration information */ 1174 /* data points to a pointer to the configuration structure */ 1175 1176 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 1177 if (k_cfg == NULL) { 1178 return (ENOMEM); 1179 } 1180 #ifdef COMPAT_NETBSD32 1181 #ifdef _LP64 1182 if (cmd == RAIDFRAME_CONFIGURE32 && 1183 (l->l_proc->p_flag & PK_32) != 0) 1184 retcode = rf_config_netbsd32(data, k_cfg); 1185 else 1186 #endif 1187 #endif 1188 { 1189 u_cfg = *((RF_Config_t **) data); 1190 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 1191 } 1192 if (retcode) { 1193 RF_Free(k_cfg, sizeof(RF_Config_t)); 1194 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 1195 retcode)); 1196 goto no_config; 1197 } 1198 goto config; 1199 config: 1200 rs->sc_flags &= ~RAIDF_SHUTDOWN; 1201 1202 /* allocate a buffer for the layout-specific data, and copy it 1203 * in */ 1204 if (k_cfg->layoutSpecificSize) { 1205 if (k_cfg->layoutSpecificSize > 10000) { 1206 /* sanity check */ 1207 RF_Free(k_cfg, sizeof(RF_Config_t)); 1208 retcode = EINVAL; 1209 goto no_config; 1210 } 1211 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 1212 (u_char *)); 1213 if (specific_buf == NULL) { 1214 RF_Free(k_cfg, sizeof(RF_Config_t)); 1215 retcode = ENOMEM; 1216 goto no_config; 1217 } 1218 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1219 k_cfg->layoutSpecificSize); 1220 if (retcode) { 1221 RF_Free(k_cfg, sizeof(RF_Config_t)); 1222 RF_Free(specific_buf, 1223 k_cfg->layoutSpecificSize); 1224 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 1225 retcode)); 1226 goto no_config; 1227 } 1228 } else 1229 specific_buf = NULL; 1230 k_cfg->layoutSpecific = specific_buf; 1231 1232 /* should do some kind of sanity check on the configuration. 1233 * Store the sum of all the bytes in the last byte? */ 1234 1235 /* configure the system */ 1236 1237 /* 1238 * Clear the entire RAID descriptor, just to make sure 1239 * there is no stale data left in the case of a 1240 * reconfiguration 1241 */ 1242 memset(raidPtr, 0, sizeof(*raidPtr)); 1243 raidPtr->softc = rs; 1244 raidPtr->raidid = unit; 1245 1246 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1247 1248 if (retcode == 0) { 1249 1250 /* allow this many simultaneous IO's to 1251 this RAID device */ 1252 raidPtr->openings = RAIDOUTSTANDING; 1253 1254 raidinit(rs); 1255 raid_wakeup(raidPtr); 1256 rf_markalldirty(raidPtr); 1257 } 1258 /* free the buffers. No return code here. */ 1259 if (k_cfg->layoutSpecificSize) { 1260 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1261 } 1262 RF_Free(k_cfg, sizeof(RF_Config_t)); 1263 1264 no_config: 1265 /* 1266 * If configuration failed, set sc_flags so that we 1267 * will detach the device when we close it. 1268 */ 1269 if (retcode != 0) 1270 rs->sc_flags |= RAIDF_SHUTDOWN; 1271 return (retcode); 1272 1273 /* shutdown the system */ 1274 case RAIDFRAME_SHUTDOWN: 1275 1276 part = DISKPART(dev); 1277 pmask = (1 << part); 1278 1279 if ((error = raidlock(rs)) != 0) 1280 return (error); 1281 1282 if (DK_BUSY(dksc, pmask) || 1283 raidPtr->recon_in_progress != 0 || 1284 raidPtr->parity_rewrite_in_progress != 0 || 1285 raidPtr->copyback_in_progress != 0) 1286 retcode = EBUSY; 1287 else { 1288 /* detach and free on close */ 1289 rs->sc_flags |= RAIDF_SHUTDOWN; 1290 retcode = 0; 1291 } 1292 1293 raidunlock(rs); 1294 1295 return (retcode); 1296 case RAIDFRAME_GET_COMPONENT_LABEL: 1297 return rf_get_component_label(raidPtr, data); 1298 1299 #if 0 1300 case RAIDFRAME_SET_COMPONENT_LABEL: 1301 clabel = (RF_ComponentLabel_t *) data; 1302 1303 /* XXX check the label for valid stuff... */ 1304 /* Note that some things *should not* get modified -- 1305 the user should be re-initing the labels instead of 1306 trying to patch things. 1307 */ 1308 1309 raidid = raidPtr->raidid; 1310 #ifdef DEBUG 1311 printf("raid%d: Got component label:\n", raidid); 1312 printf("raid%d: Version: %d\n", raidid, clabel->version); 1313 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1314 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1315 printf("raid%d: Column: %d\n", raidid, clabel->column); 1316 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1317 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1318 printf("raid%d: Status: %d\n", raidid, clabel->status); 1319 #endif 1320 clabel->row = 0; 1321 column = clabel->column; 1322 1323 if ((column < 0) || (column >= raidPtr->numCol)) { 1324 return(EINVAL); 1325 } 1326 1327 /* XXX this isn't allowed to do anything for now :-) */ 1328 1329 /* XXX and before it is, we need to fill in the rest 1330 of the fields!?!?!?! */ 1331 memcpy(raidget_component_label(raidPtr, column), 1332 clabel, sizeof(*clabel)); 1333 raidflush_component_label(raidPtr, column); 1334 return (0); 1335 #endif 1336 1337 case RAIDFRAME_INIT_LABELS: 1338 clabel = (RF_ComponentLabel_t *) data; 1339 /* 1340 we only want the serial number from 1341 the above. We get all the rest of the information 1342 from the config that was used to create this RAID 1343 set. 1344 */ 1345 1346 raidPtr->serial_number = clabel->serial_number; 1347 1348 for(column=0;column<raidPtr->numCol;column++) { 1349 diskPtr = &raidPtr->Disks[column]; 1350 if (!RF_DEAD_DISK(diskPtr->status)) { 1351 ci_label = raidget_component_label(raidPtr, 1352 column); 1353 /* Zeroing this is important. */ 1354 memset(ci_label, 0, sizeof(*ci_label)); 1355 raid_init_component_label(raidPtr, ci_label); 1356 ci_label->serial_number = 1357 raidPtr->serial_number; 1358 ci_label->row = 0; /* we dont' pretend to support more */ 1359 rf_component_label_set_partitionsize(ci_label, 1360 diskPtr->partitionSize); 1361 ci_label->column = column; 1362 raidflush_component_label(raidPtr, column); 1363 } 1364 /* XXXjld what about the spares? */ 1365 } 1366 1367 return (retcode); 1368 case RAIDFRAME_SET_AUTOCONFIG: 1369 d = rf_set_autoconfig(raidPtr, *(int *) data); 1370 printf("raid%d: New autoconfig value is: %d\n", 1371 raidPtr->raidid, d); 1372 *(int *) data = d; 1373 return (retcode); 1374 1375 case RAIDFRAME_SET_ROOT: 1376 d = rf_set_rootpartition(raidPtr, *(int *) data); 1377 printf("raid%d: New rootpartition value is: %d\n", 1378 raidPtr->raidid, d); 1379 *(int *) data = d; 1380 return (retcode); 1381 1382 /* initialize all parity */ 1383 case RAIDFRAME_REWRITEPARITY: 1384 1385 if (raidPtr->Layout.map->faultsTolerated == 0) { 1386 /* Parity for RAID 0 is trivially correct */ 1387 raidPtr->parity_good = RF_RAID_CLEAN; 1388 return(0); 1389 } 1390 1391 if (raidPtr->parity_rewrite_in_progress == 1) { 1392 /* Re-write is already in progress! */ 1393 return(EINVAL); 1394 } 1395 1396 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1397 rf_RewriteParityThread, 1398 raidPtr,"raid_parity"); 1399 return (retcode); 1400 1401 1402 case RAIDFRAME_ADD_HOT_SPARE: 1403 sparePtr = (RF_SingleComponent_t *) data; 1404 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t)); 1405 retcode = rf_add_hot_spare(raidPtr, &component); 1406 return(retcode); 1407 1408 case RAIDFRAME_REMOVE_HOT_SPARE: 1409 return(retcode); 1410 1411 case RAIDFRAME_DELETE_COMPONENT: 1412 componentPtr = (RF_SingleComponent_t *)data; 1413 memcpy( &component, componentPtr, 1414 sizeof(RF_SingleComponent_t)); 1415 retcode = rf_delete_component(raidPtr, &component); 1416 return(retcode); 1417 1418 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1419 componentPtr = (RF_SingleComponent_t *)data; 1420 memcpy( &component, componentPtr, 1421 sizeof(RF_SingleComponent_t)); 1422 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1423 return(retcode); 1424 1425 case RAIDFRAME_REBUILD_IN_PLACE: 1426 1427 if (raidPtr->Layout.map->faultsTolerated == 0) { 1428 /* Can't do this on a RAID 0!! */ 1429 return(EINVAL); 1430 } 1431 1432 if (raidPtr->recon_in_progress == 1) { 1433 /* a reconstruct is already in progress! */ 1434 return(EINVAL); 1435 } 1436 1437 componentPtr = (RF_SingleComponent_t *) data; 1438 memcpy( &component, componentPtr, 1439 sizeof(RF_SingleComponent_t)); 1440 component.row = 0; /* we don't support any more */ 1441 column = component.column; 1442 1443 if ((column < 0) || (column >= raidPtr->numCol)) { 1444 return(EINVAL); 1445 } 1446 1447 rf_lock_mutex2(raidPtr->mutex); 1448 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1449 (raidPtr->numFailures > 0)) { 1450 /* XXX 0 above shouldn't be constant!!! */ 1451 /* some component other than this has failed. 1452 Let's not make things worse than they already 1453 are... */ 1454 printf("raid%d: Unable to reconstruct to disk at:\n", 1455 raidPtr->raidid); 1456 printf("raid%d: Col: %d Too many failures.\n", 1457 raidPtr->raidid, column); 1458 rf_unlock_mutex2(raidPtr->mutex); 1459 return (EINVAL); 1460 } 1461 if (raidPtr->Disks[column].status == 1462 rf_ds_reconstructing) { 1463 printf("raid%d: Unable to reconstruct to disk at:\n", 1464 raidPtr->raidid); 1465 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column); 1466 1467 rf_unlock_mutex2(raidPtr->mutex); 1468 return (EINVAL); 1469 } 1470 if (raidPtr->Disks[column].status == rf_ds_spared) { 1471 rf_unlock_mutex2(raidPtr->mutex); 1472 return (EINVAL); 1473 } 1474 rf_unlock_mutex2(raidPtr->mutex); 1475 1476 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *)); 1477 if (rrint == NULL) 1478 return(ENOMEM); 1479 1480 rrint->col = column; 1481 rrint->raidPtr = raidPtr; 1482 1483 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1484 rf_ReconstructInPlaceThread, 1485 rrint, "raid_reconip"); 1486 return(retcode); 1487 1488 case RAIDFRAME_GET_INFO: 1489 #ifdef COMPAT_NETBSD32 1490 #ifdef _LP64 1491 case RAIDFRAME_GET_INFO32: 1492 #endif 1493 #endif 1494 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1495 (RF_DeviceConfig_t *)); 1496 if (d_cfg == NULL) 1497 return (ENOMEM); 1498 retcode = rf_get_info(raidPtr, d_cfg); 1499 if (retcode == 0) { 1500 #ifdef COMPAT_NETBSD32 1501 #ifdef _LP64 1502 if (cmd == RAIDFRAME_GET_INFO32) 1503 ucfgp = NETBSD32PTR64(*(netbsd32_pointer_t *)data); 1504 else 1505 #endif 1506 #endif 1507 ucfgp = *(RF_DeviceConfig_t **)data; 1508 retcode = copyout(d_cfg, ucfgp, sizeof(RF_DeviceConfig_t)); 1509 } 1510 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1511 1512 return (retcode); 1513 1514 case RAIDFRAME_CHECK_PARITY: 1515 *(int *) data = raidPtr->parity_good; 1516 return (0); 1517 1518 case RAIDFRAME_PARITYMAP_STATUS: 1519 if (rf_paritymap_ineligible(raidPtr)) 1520 return EINVAL; 1521 rf_paritymap_status(raidPtr->parity_map, 1522 (struct rf_pmstat *)data); 1523 return 0; 1524 1525 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1526 if (rf_paritymap_ineligible(raidPtr)) 1527 return EINVAL; 1528 if (raidPtr->parity_map == NULL) 1529 return ENOENT; /* ??? */ 1530 if (0 != rf_paritymap_set_params(raidPtr->parity_map, 1531 (struct rf_pmparams *)data, 1)) 1532 return EINVAL; 1533 return 0; 1534 1535 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1536 if (rf_paritymap_ineligible(raidPtr)) 1537 return EINVAL; 1538 *(int *) data = rf_paritymap_get_disable(raidPtr); 1539 return 0; 1540 1541 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1542 if (rf_paritymap_ineligible(raidPtr)) 1543 return EINVAL; 1544 rf_paritymap_set_disable(raidPtr, *(int *)data); 1545 /* XXX should errors be passed up? */ 1546 return 0; 1547 1548 case RAIDFRAME_RESET_ACCTOTALS: 1549 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1550 return (0); 1551 1552 case RAIDFRAME_GET_ACCTOTALS: 1553 totals = (RF_AccTotals_t *) data; 1554 *totals = raidPtr->acc_totals; 1555 return (0); 1556 1557 case RAIDFRAME_KEEP_ACCTOTALS: 1558 raidPtr->keep_acc_totals = *(int *)data; 1559 return (0); 1560 1561 case RAIDFRAME_GET_SIZE: 1562 *(int *) data = raidPtr->totalSectors; 1563 return (0); 1564 1565 /* fail a disk & optionally start reconstruction */ 1566 case RAIDFRAME_FAIL_DISK: 1567 #ifdef COMPAT_80 1568 case RAIDFRAME_FAIL_DISK80: 1569 #endif 1570 1571 if (raidPtr->Layout.map->faultsTolerated == 0) { 1572 /* Can't do this on a RAID 0!! */ 1573 return(EINVAL); 1574 } 1575 1576 rr = (struct rf_recon_req *) data; 1577 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1578 return (EINVAL); 1579 1580 rf_lock_mutex2(raidPtr->mutex); 1581 if (raidPtr->status == rf_rs_reconstructing) { 1582 /* you can't fail a disk while we're reconstructing! */ 1583 /* XXX wrong for RAID6 */ 1584 rf_unlock_mutex2(raidPtr->mutex); 1585 return (EINVAL); 1586 } 1587 if ((raidPtr->Disks[rr->col].status == 1588 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1589 /* some other component has failed. Let's not make 1590 things worse. XXX wrong for RAID6 */ 1591 rf_unlock_mutex2(raidPtr->mutex); 1592 return (EINVAL); 1593 } 1594 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1595 /* Can't fail a spared disk! */ 1596 rf_unlock_mutex2(raidPtr->mutex); 1597 return (EINVAL); 1598 } 1599 rf_unlock_mutex2(raidPtr->mutex); 1600 1601 /* make a copy of the recon request so that we don't rely on 1602 * the user's buffer */ 1603 RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *)); 1604 if (rrint == NULL) 1605 return(ENOMEM); 1606 rrint->col = rr->col; 1607 rrint->flags = rr->flags; 1608 rrint->raidPtr = raidPtr; 1609 1610 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1611 rf_ReconThread, 1612 rrint, "raid_recon"); 1613 return (0); 1614 1615 /* invoke a copyback operation after recon on whatever disk 1616 * needs it, if any */ 1617 case RAIDFRAME_COPYBACK: 1618 1619 if (raidPtr->Layout.map->faultsTolerated == 0) { 1620 /* This makes no sense on a RAID 0!! */ 1621 return(EINVAL); 1622 } 1623 1624 if (raidPtr->copyback_in_progress == 1) { 1625 /* Copyback is already in progress! */ 1626 return(EINVAL); 1627 } 1628 1629 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1630 rf_CopybackThread, 1631 raidPtr,"raid_copyback"); 1632 return (retcode); 1633 1634 /* return the percentage completion of reconstruction */ 1635 case RAIDFRAME_CHECK_RECON_STATUS: 1636 if (raidPtr->Layout.map->faultsTolerated == 0) { 1637 /* This makes no sense on a RAID 0, so tell the 1638 user it's done. */ 1639 *(int *) data = 100; 1640 return(0); 1641 } 1642 if (raidPtr->status != rf_rs_reconstructing) 1643 *(int *) data = 100; 1644 else { 1645 if (raidPtr->reconControl->numRUsTotal > 0) { 1646 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1647 } else { 1648 *(int *) data = 0; 1649 } 1650 } 1651 return (0); 1652 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1653 rf_check_recon_status_ext(raidPtr, data); 1654 return (0); 1655 1656 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1657 if (raidPtr->Layout.map->faultsTolerated == 0) { 1658 /* This makes no sense on a RAID 0, so tell the 1659 user it's done. */ 1660 *(int *) data = 100; 1661 return(0); 1662 } 1663 if (raidPtr->parity_rewrite_in_progress == 1) { 1664 *(int *) data = 100 * 1665 raidPtr->parity_rewrite_stripes_done / 1666 raidPtr->Layout.numStripe; 1667 } else { 1668 *(int *) data = 100; 1669 } 1670 return (0); 1671 1672 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1673 rf_check_parityrewrite_status_ext(raidPtr, data); 1674 return (0); 1675 1676 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1677 if (raidPtr->Layout.map->faultsTolerated == 0) { 1678 /* This makes no sense on a RAID 0 */ 1679 *(int *) data = 100; 1680 return(0); 1681 } 1682 if (raidPtr->copyback_in_progress == 1) { 1683 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1684 raidPtr->Layout.numStripe; 1685 } else { 1686 *(int *) data = 100; 1687 } 1688 return (0); 1689 1690 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1691 rf_check_copyback_status_ext(raidPtr, data); 1692 return 0; 1693 1694 case RAIDFRAME_SET_LAST_UNIT: 1695 for (column = 0; column < raidPtr->numCol; column++) 1696 if (raidPtr->Disks[column].status != rf_ds_optimal) 1697 return EBUSY; 1698 1699 for (column = 0; column < raidPtr->numCol; column++) { 1700 clabel = raidget_component_label(raidPtr, column); 1701 clabel->last_unit = *(int *)data; 1702 raidflush_component_label(raidPtr, column); 1703 } 1704 rs->sc_cflags |= RAIDF_UNIT_CHANGED; 1705 return 0; 1706 1707 /* the sparetable daemon calls this to wait for the kernel to 1708 * need a spare table. this ioctl does not return until a 1709 * spare table is needed. XXX -- calling mpsleep here in the 1710 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1711 * -- I should either compute the spare table in the kernel, 1712 * or have a different -- XXX XXX -- interface (a different 1713 * character device) for delivering the table -- XXX */ 1714 #if 0 1715 case RAIDFRAME_SPARET_WAIT: 1716 rf_lock_mutex2(rf_sparet_wait_mutex); 1717 while (!rf_sparet_wait_queue) 1718 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex); 1719 waitreq = rf_sparet_wait_queue; 1720 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1721 rf_unlock_mutex2(rf_sparet_wait_mutex); 1722 1723 /* structure assignment */ 1724 *((RF_SparetWait_t *) data) = *waitreq; 1725 1726 RF_Free(waitreq, sizeof(*waitreq)); 1727 return (0); 1728 1729 /* wakes up a process waiting on SPARET_WAIT and puts an error 1730 * code in it that will cause the dameon to exit */ 1731 case RAIDFRAME_ABORT_SPARET_WAIT: 1732 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1733 waitreq->fcol = -1; 1734 rf_lock_mutex2(rf_sparet_wait_mutex); 1735 waitreq->next = rf_sparet_wait_queue; 1736 rf_sparet_wait_queue = waitreq; 1737 rf_broadcast_conf2(rf_sparet_wait_cv); 1738 rf_unlock_mutex2(rf_sparet_wait_mutex); 1739 return (0); 1740 1741 /* used by the spare table daemon to deliver a spare table 1742 * into the kernel */ 1743 case RAIDFRAME_SEND_SPARET: 1744 1745 /* install the spare table */ 1746 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1747 1748 /* respond to the requestor. the return status of the spare 1749 * table installation is passed in the "fcol" field */ 1750 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1751 waitreq->fcol = retcode; 1752 rf_lock_mutex2(rf_sparet_wait_mutex); 1753 waitreq->next = rf_sparet_resp_queue; 1754 rf_sparet_resp_queue = waitreq; 1755 rf_broadcast_cond2(rf_sparet_resp_cv); 1756 rf_unlock_mutex2(rf_sparet_wait_mutex); 1757 1758 return (retcode); 1759 #endif 1760 1761 default: 1762 break; /* fall through to the os-specific code below */ 1763 1764 } 1765 1766 if (!raidPtr->valid) 1767 return (EINVAL); 1768 1769 /* 1770 * Add support for "regular" device ioctls here. 1771 */ 1772 1773 switch (cmd) { 1774 case DIOCGCACHE: 1775 retcode = rf_get_component_caches(raidPtr, (int *)data); 1776 break; 1777 1778 case DIOCCACHESYNC: 1779 retcode = rf_sync_component_caches(raidPtr); 1780 break; 1781 1782 default: 1783 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l); 1784 break; 1785 } 1786 1787 return (retcode); 1788 1789 } 1790 1791 1792 /* raidinit -- complete the rest of the initialization for the 1793 RAIDframe device. */ 1794 1795 1796 static void 1797 raidinit(struct raid_softc *rs) 1798 { 1799 cfdata_t cf; 1800 unsigned int unit; 1801 struct dk_softc *dksc = &rs->sc_dksc; 1802 RF_Raid_t *raidPtr = &rs->sc_r; 1803 device_t dev; 1804 1805 unit = raidPtr->raidid; 1806 1807 /* XXX doesn't check bounds. */ 1808 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit); 1809 1810 /* attach the pseudo device */ 1811 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1812 cf->cf_name = raid_cd.cd_name; 1813 cf->cf_atname = raid_cd.cd_name; 1814 cf->cf_unit = unit; 1815 cf->cf_fstate = FSTATE_STAR; 1816 1817 dev = config_attach_pseudo(cf); 1818 if (dev == NULL) { 1819 printf("raid%d: config_attach_pseudo failed\n", 1820 raidPtr->raidid); 1821 free(cf, M_RAIDFRAME); 1822 return; 1823 } 1824 1825 /* provide a backpointer to the real softc */ 1826 raidsoftc(dev) = rs; 1827 1828 /* disk_attach actually creates space for the CPU disklabel, among 1829 * other things, so it's critical to call this *BEFORE* we try putzing 1830 * with disklabels. */ 1831 dk_init(dksc, dev, DKTYPE_RAID); 1832 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver); 1833 1834 /* XXX There may be a weird interaction here between this, and 1835 * protectedSectors, as used in RAIDframe. */ 1836 1837 rs->sc_size = raidPtr->totalSectors; 1838 1839 /* Attach dk and disk subsystems */ 1840 dk_attach(dksc); 1841 disk_attach(&dksc->sc_dkdev); 1842 rf_set_geometry(rs, raidPtr); 1843 1844 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK); 1845 1846 /* mark unit as usuable */ 1847 rs->sc_flags |= RAIDF_INITED; 1848 1849 dkwedge_discover(&dksc->sc_dkdev); 1850 } 1851 1852 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1853 /* wake up the daemon & tell it to get us a spare table 1854 * XXX 1855 * the entries in the queues should be tagged with the raidPtr 1856 * so that in the extremely rare case that two recons happen at once, 1857 * we know for which device were requesting a spare table 1858 * XXX 1859 * 1860 * XXX This code is not currently used. GO 1861 */ 1862 int 1863 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1864 { 1865 int retcode; 1866 1867 rf_lock_mutex2(rf_sparet_wait_mutex); 1868 req->next = rf_sparet_wait_queue; 1869 rf_sparet_wait_queue = req; 1870 rf_broadcast_cond2(rf_sparet_wait_cv); 1871 1872 /* mpsleep unlocks the mutex */ 1873 while (!rf_sparet_resp_queue) { 1874 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex); 1875 } 1876 req = rf_sparet_resp_queue; 1877 rf_sparet_resp_queue = req->next; 1878 rf_unlock_mutex2(rf_sparet_wait_mutex); 1879 1880 retcode = req->fcol; 1881 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1882 * alloc'd */ 1883 return (retcode); 1884 } 1885 #endif 1886 1887 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1888 * bp & passes it down. 1889 * any calls originating in the kernel must use non-blocking I/O 1890 * do some extra sanity checking to return "appropriate" error values for 1891 * certain conditions (to make some standard utilities work) 1892 * 1893 * Formerly known as: rf_DoAccessKernel 1894 */ 1895 void 1896 raidstart(RF_Raid_t *raidPtr) 1897 { 1898 struct raid_softc *rs; 1899 struct dk_softc *dksc; 1900 1901 rs = raidPtr->softc; 1902 dksc = &rs->sc_dksc; 1903 /* quick check to see if anything has died recently */ 1904 rf_lock_mutex2(raidPtr->mutex); 1905 if (raidPtr->numNewFailures > 0) { 1906 rf_unlock_mutex2(raidPtr->mutex); 1907 rf_update_component_labels(raidPtr, 1908 RF_NORMAL_COMPONENT_UPDATE); 1909 rf_lock_mutex2(raidPtr->mutex); 1910 raidPtr->numNewFailures--; 1911 } 1912 rf_unlock_mutex2(raidPtr->mutex); 1913 1914 if ((rs->sc_flags & RAIDF_INITED) == 0) { 1915 printf("raid%d: raidstart not ready\n", raidPtr->raidid); 1916 return; 1917 } 1918 1919 dk_start(dksc, NULL); 1920 } 1921 1922 static int 1923 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp) 1924 { 1925 RF_SectorCount_t num_blocks, pb, sum; 1926 RF_RaidAddr_t raid_addr; 1927 daddr_t blocknum; 1928 int do_async; 1929 int rc; 1930 1931 rf_lock_mutex2(raidPtr->mutex); 1932 if (raidPtr->openings == 0) { 1933 rf_unlock_mutex2(raidPtr->mutex); 1934 return EAGAIN; 1935 } 1936 rf_unlock_mutex2(raidPtr->mutex); 1937 1938 blocknum = bp->b_rawblkno; 1939 1940 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 1941 (int) blocknum)); 1942 1943 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 1944 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 1945 1946 /* *THIS* is where we adjust what block we're going to... 1947 * but DO NOT TOUCH bp->b_blkno!!! */ 1948 raid_addr = blocknum; 1949 1950 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 1951 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 1952 sum = raid_addr + num_blocks + pb; 1953 if (1 || rf_debugKernelAccess) { 1954 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 1955 (int) raid_addr, (int) sum, (int) num_blocks, 1956 (int) pb, (int) bp->b_resid)); 1957 } 1958 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 1959 || (sum < num_blocks) || (sum < pb)) { 1960 rc = ENOSPC; 1961 goto done; 1962 } 1963 /* 1964 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 1965 */ 1966 1967 if (bp->b_bcount & raidPtr->sectorMask) { 1968 rc = ENOSPC; 1969 goto done; 1970 } 1971 db1_printf(("Calling DoAccess..\n")); 1972 1973 1974 rf_lock_mutex2(raidPtr->mutex); 1975 raidPtr->openings--; 1976 rf_unlock_mutex2(raidPtr->mutex); 1977 1978 /* 1979 * Everything is async. 1980 */ 1981 do_async = 1; 1982 1983 /* don't ever condition on bp->b_flags & B_WRITE. 1984 * always condition on B_READ instead */ 1985 1986 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 1987 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 1988 do_async, raid_addr, num_blocks, 1989 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 1990 1991 done: 1992 return rc; 1993 } 1994 1995 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 1996 1997 int 1998 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 1999 { 2000 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2001 struct buf *bp; 2002 2003 req->queue = queue; 2004 bp = req->bp; 2005 2006 switch (req->type) { 2007 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2008 /* XXX need to do something extra here.. */ 2009 /* I'm leaving this in, as I've never actually seen it used, 2010 * and I'd like folks to report it... GO */ 2011 printf(("WAKEUP CALLED\n")); 2012 queue->numOutstanding++; 2013 2014 bp->b_flags = 0; 2015 bp->b_private = req; 2016 2017 KernelWakeupFunc(bp); 2018 break; 2019 2020 case RF_IO_TYPE_READ: 2021 case RF_IO_TYPE_WRITE: 2022 #if RF_ACC_TRACE > 0 2023 if (req->tracerec) { 2024 RF_ETIMER_START(req->tracerec->timer); 2025 } 2026 #endif 2027 InitBP(bp, queue->rf_cinfo->ci_vp, 2028 op, queue->rf_cinfo->ci_dev, 2029 req->sectorOffset, req->numSector, 2030 req->buf, KernelWakeupFunc, (void *) req, 2031 queue->raidPtr->logBytesPerSector, req->b_proc); 2032 2033 if (rf_debugKernelAccess) { 2034 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2035 (long) bp->b_blkno)); 2036 } 2037 queue->numOutstanding++; 2038 queue->last_deq_sector = req->sectorOffset; 2039 /* acc wouldn't have been let in if there were any pending 2040 * reqs at any other priority */ 2041 queue->curPriority = req->priority; 2042 2043 db1_printf(("Going for %c to unit %d col %d\n", 2044 req->type, queue->raidPtr->raidid, 2045 queue->col)); 2046 db1_printf(("sector %d count %d (%d bytes) %d\n", 2047 (int) req->sectorOffset, (int) req->numSector, 2048 (int) (req->numSector << 2049 queue->raidPtr->logBytesPerSector), 2050 (int) queue->raidPtr->logBytesPerSector)); 2051 2052 /* 2053 * XXX: drop lock here since this can block at 2054 * least with backing SCSI devices. Retake it 2055 * to minimize fuss with calling interfaces. 2056 */ 2057 2058 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam"); 2059 bdev_strategy(bp); 2060 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam"); 2061 break; 2062 2063 default: 2064 panic("bad req->type in rf_DispatchKernelIO"); 2065 } 2066 db1_printf(("Exiting from DispatchKernelIO\n")); 2067 2068 return (0); 2069 } 2070 /* this is the callback function associated with a I/O invoked from 2071 kernel code. 2072 */ 2073 static void 2074 KernelWakeupFunc(struct buf *bp) 2075 { 2076 RF_DiskQueueData_t *req = NULL; 2077 RF_DiskQueue_t *queue; 2078 2079 db1_printf(("recovering the request queue:\n")); 2080 2081 req = bp->b_private; 2082 2083 queue = (RF_DiskQueue_t *) req->queue; 2084 2085 rf_lock_mutex2(queue->raidPtr->iodone_lock); 2086 2087 #if RF_ACC_TRACE > 0 2088 if (req->tracerec) { 2089 RF_ETIMER_STOP(req->tracerec->timer); 2090 RF_ETIMER_EVAL(req->tracerec->timer); 2091 rf_lock_mutex2(rf_tracing_mutex); 2092 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2093 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2094 req->tracerec->num_phys_ios++; 2095 rf_unlock_mutex2(rf_tracing_mutex); 2096 } 2097 #endif 2098 2099 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2100 * ballistic, and mark the component as hosed... */ 2101 2102 if (bp->b_error != 0) { 2103 /* Mark the disk as dead */ 2104 /* but only mark it once... */ 2105 /* and only if it wouldn't leave this RAID set 2106 completely broken */ 2107 if (((queue->raidPtr->Disks[queue->col].status == 2108 rf_ds_optimal) || 2109 (queue->raidPtr->Disks[queue->col].status == 2110 rf_ds_used_spare)) && 2111 (queue->raidPtr->numFailures < 2112 queue->raidPtr->Layout.map->faultsTolerated)) { 2113 printf("raid%d: IO Error (%d). Marking %s as failed.\n", 2114 queue->raidPtr->raidid, 2115 bp->b_error, 2116 queue->raidPtr->Disks[queue->col].devname); 2117 queue->raidPtr->Disks[queue->col].status = 2118 rf_ds_failed; 2119 queue->raidPtr->status = rf_rs_degraded; 2120 queue->raidPtr->numFailures++; 2121 queue->raidPtr->numNewFailures++; 2122 } else { /* Disk is already dead... */ 2123 /* printf("Disk already marked as dead!\n"); */ 2124 } 2125 2126 } 2127 2128 /* Fill in the error value */ 2129 req->error = bp->b_error; 2130 2131 /* Drop this one on the "finished" queue... */ 2132 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2133 2134 /* Let the raidio thread know there is work to be done. */ 2135 rf_signal_cond2(queue->raidPtr->iodone_cv); 2136 2137 rf_unlock_mutex2(queue->raidPtr->iodone_lock); 2138 } 2139 2140 2141 /* 2142 * initialize a buf structure for doing an I/O in the kernel. 2143 */ 2144 static void 2145 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2146 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2147 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 2148 struct proc *b_proc) 2149 { 2150 /* bp->b_flags = B_PHYS | rw_flag; */ 2151 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */ 2152 bp->b_oflags = 0; 2153 bp->b_cflags = 0; 2154 bp->b_bcount = numSect << logBytesPerSector; 2155 bp->b_bufsize = bp->b_bcount; 2156 bp->b_error = 0; 2157 bp->b_dev = dev; 2158 bp->b_data = bf; 2159 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT; 2160 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2161 if (bp->b_bcount == 0) { 2162 panic("bp->b_bcount is zero in InitBP!!"); 2163 } 2164 bp->b_proc = b_proc; 2165 bp->b_iodone = cbFunc; 2166 bp->b_private = cbArg; 2167 } 2168 2169 /* 2170 * Wait interruptibly for an exclusive lock. 2171 * 2172 * XXX 2173 * Several drivers do this; it should be abstracted and made MP-safe. 2174 * (Hmm... where have we seen this warning before :-> GO ) 2175 */ 2176 static int 2177 raidlock(struct raid_softc *rs) 2178 { 2179 int error; 2180 2181 error = 0; 2182 mutex_enter(&rs->sc_mutex); 2183 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2184 rs->sc_flags |= RAIDF_WANTED; 2185 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex); 2186 if (error != 0) 2187 goto done; 2188 } 2189 rs->sc_flags |= RAIDF_LOCKED; 2190 done: 2191 mutex_exit(&rs->sc_mutex); 2192 return (error); 2193 } 2194 /* 2195 * Unlock and wake up any waiters. 2196 */ 2197 static void 2198 raidunlock(struct raid_softc *rs) 2199 { 2200 2201 mutex_enter(&rs->sc_mutex); 2202 rs->sc_flags &= ~RAIDF_LOCKED; 2203 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2204 rs->sc_flags &= ~RAIDF_WANTED; 2205 cv_broadcast(&rs->sc_cv); 2206 } 2207 mutex_exit(&rs->sc_mutex); 2208 } 2209 2210 2211 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2212 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2213 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE 2214 2215 static daddr_t 2216 rf_component_info_offset(void) 2217 { 2218 2219 return RF_COMPONENT_INFO_OFFSET; 2220 } 2221 2222 static daddr_t 2223 rf_component_info_size(unsigned secsize) 2224 { 2225 daddr_t info_size; 2226 2227 KASSERT(secsize); 2228 if (secsize > RF_COMPONENT_INFO_SIZE) 2229 info_size = secsize; 2230 else 2231 info_size = RF_COMPONENT_INFO_SIZE; 2232 2233 return info_size; 2234 } 2235 2236 static daddr_t 2237 rf_parity_map_offset(RF_Raid_t *raidPtr) 2238 { 2239 daddr_t map_offset; 2240 2241 KASSERT(raidPtr->bytesPerSector); 2242 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE) 2243 map_offset = raidPtr->bytesPerSector; 2244 else 2245 map_offset = RF_COMPONENT_INFO_SIZE; 2246 map_offset += rf_component_info_offset(); 2247 2248 return map_offset; 2249 } 2250 2251 static daddr_t 2252 rf_parity_map_size(RF_Raid_t *raidPtr) 2253 { 2254 daddr_t map_size; 2255 2256 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE) 2257 map_size = raidPtr->bytesPerSector; 2258 else 2259 map_size = RF_PARITY_MAP_SIZE; 2260 2261 return map_size; 2262 } 2263 2264 int 2265 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col) 2266 { 2267 RF_ComponentLabel_t *clabel; 2268 2269 clabel = raidget_component_label(raidPtr, col); 2270 clabel->clean = RF_RAID_CLEAN; 2271 raidflush_component_label(raidPtr, col); 2272 return(0); 2273 } 2274 2275 2276 int 2277 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col) 2278 { 2279 RF_ComponentLabel_t *clabel; 2280 2281 clabel = raidget_component_label(raidPtr, col); 2282 clabel->clean = RF_RAID_DIRTY; 2283 raidflush_component_label(raidPtr, col); 2284 return(0); 2285 } 2286 2287 int 2288 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2289 { 2290 KASSERT(raidPtr->bytesPerSector); 2291 return raidread_component_label(raidPtr->bytesPerSector, 2292 raidPtr->Disks[col].dev, 2293 raidPtr->raid_cinfo[col].ci_vp, 2294 &raidPtr->raid_cinfo[col].ci_label); 2295 } 2296 2297 RF_ComponentLabel_t * 2298 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2299 { 2300 return &raidPtr->raid_cinfo[col].ci_label; 2301 } 2302 2303 int 2304 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2305 { 2306 RF_ComponentLabel_t *label; 2307 2308 label = &raidPtr->raid_cinfo[col].ci_label; 2309 label->mod_counter = raidPtr->mod_counter; 2310 #ifndef RF_NO_PARITY_MAP 2311 label->parity_map_modcount = label->mod_counter; 2312 #endif 2313 return raidwrite_component_label(raidPtr->bytesPerSector, 2314 raidPtr->Disks[col].dev, 2315 raidPtr->raid_cinfo[col].ci_vp, label); 2316 } 2317 2318 2319 static int 2320 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2321 RF_ComponentLabel_t *clabel) 2322 { 2323 return raidread_component_area(dev, b_vp, clabel, 2324 sizeof(RF_ComponentLabel_t), 2325 rf_component_info_offset(), 2326 rf_component_info_size(secsize)); 2327 } 2328 2329 /* ARGSUSED */ 2330 static int 2331 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data, 2332 size_t msize, daddr_t offset, daddr_t dsize) 2333 { 2334 struct buf *bp; 2335 int error; 2336 2337 /* XXX should probably ensure that we don't try to do this if 2338 someone has changed rf_protected_sectors. */ 2339 2340 if (b_vp == NULL) { 2341 /* For whatever reason, this component is not valid. 2342 Don't try to read a component label from it. */ 2343 return(EINVAL); 2344 } 2345 2346 /* get a block of the appropriate size... */ 2347 bp = geteblk((int)dsize); 2348 bp->b_dev = dev; 2349 2350 /* get our ducks in a row for the read */ 2351 bp->b_blkno = offset / DEV_BSIZE; 2352 bp->b_bcount = dsize; 2353 bp->b_flags |= B_READ; 2354 bp->b_resid = dsize; 2355 2356 bdev_strategy(bp); 2357 error = biowait(bp); 2358 2359 if (!error) { 2360 memcpy(data, bp->b_data, msize); 2361 } 2362 2363 brelse(bp, 0); 2364 return(error); 2365 } 2366 2367 2368 static int 2369 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2370 RF_ComponentLabel_t *clabel) 2371 { 2372 return raidwrite_component_area(dev, b_vp, clabel, 2373 sizeof(RF_ComponentLabel_t), 2374 rf_component_info_offset(), 2375 rf_component_info_size(secsize), 0); 2376 } 2377 2378 /* ARGSUSED */ 2379 static int 2380 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data, 2381 size_t msize, daddr_t offset, daddr_t dsize, int asyncp) 2382 { 2383 struct buf *bp; 2384 int error; 2385 2386 /* get a block of the appropriate size... */ 2387 bp = geteblk((int)dsize); 2388 bp->b_dev = dev; 2389 2390 /* get our ducks in a row for the write */ 2391 bp->b_blkno = offset / DEV_BSIZE; 2392 bp->b_bcount = dsize; 2393 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0); 2394 bp->b_resid = dsize; 2395 2396 memset(bp->b_data, 0, dsize); 2397 memcpy(bp->b_data, data, msize); 2398 2399 bdev_strategy(bp); 2400 if (asyncp) 2401 return 0; 2402 error = biowait(bp); 2403 brelse(bp, 0); 2404 if (error) { 2405 #if 1 2406 printf("Failed to write RAID component info!\n"); 2407 #endif 2408 } 2409 2410 return(error); 2411 } 2412 2413 void 2414 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2415 { 2416 int c; 2417 2418 for (c = 0; c < raidPtr->numCol; c++) { 2419 /* Skip dead disks. */ 2420 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2421 continue; 2422 /* XXXjld: what if an error occurs here? */ 2423 raidwrite_component_area(raidPtr->Disks[c].dev, 2424 raidPtr->raid_cinfo[c].ci_vp, map, 2425 RF_PARITYMAP_NBYTE, 2426 rf_parity_map_offset(raidPtr), 2427 rf_parity_map_size(raidPtr), 0); 2428 } 2429 } 2430 2431 void 2432 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2433 { 2434 struct rf_paritymap_ondisk tmp; 2435 int c,first; 2436 2437 first=1; 2438 for (c = 0; c < raidPtr->numCol; c++) { 2439 /* Skip dead disks. */ 2440 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2441 continue; 2442 raidread_component_area(raidPtr->Disks[c].dev, 2443 raidPtr->raid_cinfo[c].ci_vp, &tmp, 2444 RF_PARITYMAP_NBYTE, 2445 rf_parity_map_offset(raidPtr), 2446 rf_parity_map_size(raidPtr)); 2447 if (first) { 2448 memcpy(map, &tmp, sizeof(*map)); 2449 first = 0; 2450 } else { 2451 rf_paritymap_merge(map, &tmp); 2452 } 2453 } 2454 } 2455 2456 void 2457 rf_markalldirty(RF_Raid_t *raidPtr) 2458 { 2459 RF_ComponentLabel_t *clabel; 2460 int sparecol; 2461 int c; 2462 int j; 2463 int scol = -1; 2464 2465 raidPtr->mod_counter++; 2466 for (c = 0; c < raidPtr->numCol; c++) { 2467 /* we don't want to touch (at all) a disk that has 2468 failed */ 2469 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2470 clabel = raidget_component_label(raidPtr, c); 2471 if (clabel->status == rf_ds_spared) { 2472 /* XXX do something special... 2473 but whatever you do, don't 2474 try to access it!! */ 2475 } else { 2476 raidmarkdirty(raidPtr, c); 2477 } 2478 } 2479 } 2480 2481 for( c = 0; c < raidPtr->numSpare ; c++) { 2482 sparecol = raidPtr->numCol + c; 2483 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2484 /* 2485 2486 we claim this disk is "optimal" if it's 2487 rf_ds_used_spare, as that means it should be 2488 directly substitutable for the disk it replaced. 2489 We note that too... 2490 2491 */ 2492 2493 for(j=0;j<raidPtr->numCol;j++) { 2494 if (raidPtr->Disks[j].spareCol == sparecol) { 2495 scol = j; 2496 break; 2497 } 2498 } 2499 2500 clabel = raidget_component_label(raidPtr, sparecol); 2501 /* make sure status is noted */ 2502 2503 raid_init_component_label(raidPtr, clabel); 2504 2505 clabel->row = 0; 2506 clabel->column = scol; 2507 /* Note: we *don't* change status from rf_ds_used_spare 2508 to rf_ds_optimal */ 2509 /* clabel.status = rf_ds_optimal; */ 2510 2511 raidmarkdirty(raidPtr, sparecol); 2512 } 2513 } 2514 } 2515 2516 2517 void 2518 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2519 { 2520 RF_ComponentLabel_t *clabel; 2521 int sparecol; 2522 int c; 2523 int j; 2524 int scol; 2525 struct raid_softc *rs = raidPtr->softc; 2526 2527 scol = -1; 2528 2529 /* XXX should do extra checks to make sure things really are clean, 2530 rather than blindly setting the clean bit... */ 2531 2532 raidPtr->mod_counter++; 2533 2534 for (c = 0; c < raidPtr->numCol; c++) { 2535 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2536 clabel = raidget_component_label(raidPtr, c); 2537 /* make sure status is noted */ 2538 clabel->status = rf_ds_optimal; 2539 2540 /* note what unit we are configured as */ 2541 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0) 2542 clabel->last_unit = raidPtr->raidid; 2543 2544 raidflush_component_label(raidPtr, c); 2545 if (final == RF_FINAL_COMPONENT_UPDATE) { 2546 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2547 raidmarkclean(raidPtr, c); 2548 } 2549 } 2550 } 2551 /* else we don't touch it.. */ 2552 } 2553 2554 for( c = 0; c < raidPtr->numSpare ; c++) { 2555 sparecol = raidPtr->numCol + c; 2556 /* Need to ensure that the reconstruct actually completed! */ 2557 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2558 /* 2559 2560 we claim this disk is "optimal" if it's 2561 rf_ds_used_spare, as that means it should be 2562 directly substitutable for the disk it replaced. 2563 We note that too... 2564 2565 */ 2566 2567 for(j=0;j<raidPtr->numCol;j++) { 2568 if (raidPtr->Disks[j].spareCol == sparecol) { 2569 scol = j; 2570 break; 2571 } 2572 } 2573 2574 /* XXX shouldn't *really* need this... */ 2575 clabel = raidget_component_label(raidPtr, sparecol); 2576 /* make sure status is noted */ 2577 2578 raid_init_component_label(raidPtr, clabel); 2579 2580 clabel->column = scol; 2581 clabel->status = rf_ds_optimal; 2582 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0) 2583 clabel->last_unit = raidPtr->raidid; 2584 2585 raidflush_component_label(raidPtr, sparecol); 2586 if (final == RF_FINAL_COMPONENT_UPDATE) { 2587 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2588 raidmarkclean(raidPtr, sparecol); 2589 } 2590 } 2591 } 2592 } 2593 } 2594 2595 void 2596 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2597 { 2598 2599 if (vp != NULL) { 2600 if (auto_configured == 1) { 2601 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2602 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2603 vput(vp); 2604 2605 } else { 2606 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2607 } 2608 } 2609 } 2610 2611 2612 void 2613 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2614 { 2615 int r,c; 2616 struct vnode *vp; 2617 int acd; 2618 2619 2620 /* We take this opportunity to close the vnodes like we should.. */ 2621 2622 for (c = 0; c < raidPtr->numCol; c++) { 2623 vp = raidPtr->raid_cinfo[c].ci_vp; 2624 acd = raidPtr->Disks[c].auto_configured; 2625 rf_close_component(raidPtr, vp, acd); 2626 raidPtr->raid_cinfo[c].ci_vp = NULL; 2627 raidPtr->Disks[c].auto_configured = 0; 2628 } 2629 2630 for (r = 0; r < raidPtr->numSpare; r++) { 2631 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2632 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2633 rf_close_component(raidPtr, vp, acd); 2634 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2635 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2636 } 2637 } 2638 2639 2640 void 2641 rf_ReconThread(struct rf_recon_req_internal *req) 2642 { 2643 int s; 2644 RF_Raid_t *raidPtr; 2645 2646 s = splbio(); 2647 raidPtr = (RF_Raid_t *) req->raidPtr; 2648 raidPtr->recon_in_progress = 1; 2649 2650 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2651 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2652 2653 RF_Free(req, sizeof(*req)); 2654 2655 raidPtr->recon_in_progress = 0; 2656 splx(s); 2657 2658 /* That's all... */ 2659 kthread_exit(0); /* does not return */ 2660 } 2661 2662 void 2663 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2664 { 2665 int retcode; 2666 int s; 2667 2668 raidPtr->parity_rewrite_stripes_done = 0; 2669 raidPtr->parity_rewrite_in_progress = 1; 2670 s = splbio(); 2671 retcode = rf_RewriteParity(raidPtr); 2672 splx(s); 2673 if (retcode) { 2674 printf("raid%d: Error re-writing parity (%d)!\n", 2675 raidPtr->raidid, retcode); 2676 } else { 2677 /* set the clean bit! If we shutdown correctly, 2678 the clean bit on each component label will get 2679 set */ 2680 raidPtr->parity_good = RF_RAID_CLEAN; 2681 } 2682 raidPtr->parity_rewrite_in_progress = 0; 2683 2684 /* Anyone waiting for us to stop? If so, inform them... */ 2685 if (raidPtr->waitShutdown) { 2686 wakeup(&raidPtr->parity_rewrite_in_progress); 2687 } 2688 2689 /* That's all... */ 2690 kthread_exit(0); /* does not return */ 2691 } 2692 2693 2694 void 2695 rf_CopybackThread(RF_Raid_t *raidPtr) 2696 { 2697 int s; 2698 2699 raidPtr->copyback_in_progress = 1; 2700 s = splbio(); 2701 rf_CopybackReconstructedData(raidPtr); 2702 splx(s); 2703 raidPtr->copyback_in_progress = 0; 2704 2705 /* That's all... */ 2706 kthread_exit(0); /* does not return */ 2707 } 2708 2709 2710 void 2711 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req) 2712 { 2713 int s; 2714 RF_Raid_t *raidPtr; 2715 2716 s = splbio(); 2717 raidPtr = req->raidPtr; 2718 raidPtr->recon_in_progress = 1; 2719 rf_ReconstructInPlace(raidPtr, req->col); 2720 RF_Free(req, sizeof(*req)); 2721 raidPtr->recon_in_progress = 0; 2722 splx(s); 2723 2724 /* That's all... */ 2725 kthread_exit(0); /* does not return */ 2726 } 2727 2728 static RF_AutoConfig_t * 2729 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 2730 const char *cname, RF_SectorCount_t size, uint64_t numsecs, 2731 unsigned secsize) 2732 { 2733 int good_one = 0; 2734 RF_ComponentLabel_t *clabel; 2735 RF_AutoConfig_t *ac; 2736 2737 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT); 2738 if (clabel == NULL) { 2739 oomem: 2740 while(ac_list) { 2741 ac = ac_list; 2742 if (ac->clabel) 2743 free(ac->clabel, M_RAIDFRAME); 2744 ac_list = ac_list->next; 2745 free(ac, M_RAIDFRAME); 2746 } 2747 printf("RAID auto config: out of memory!\n"); 2748 return NULL; /* XXX probably should panic? */ 2749 } 2750 2751 if (!raidread_component_label(secsize, dev, vp, clabel)) { 2752 /* Got the label. Does it look reasonable? */ 2753 if (rf_reasonable_label(clabel, numsecs) && 2754 (rf_component_label_partitionsize(clabel) <= size)) { 2755 #ifdef DEBUG 2756 printf("Component on: %s: %llu\n", 2757 cname, (unsigned long long)size); 2758 rf_print_component_label(clabel); 2759 #endif 2760 /* if it's reasonable, add it, else ignore it. */ 2761 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 2762 M_NOWAIT); 2763 if (ac == NULL) { 2764 free(clabel, M_RAIDFRAME); 2765 goto oomem; 2766 } 2767 strlcpy(ac->devname, cname, sizeof(ac->devname)); 2768 ac->dev = dev; 2769 ac->vp = vp; 2770 ac->clabel = clabel; 2771 ac->next = ac_list; 2772 ac_list = ac; 2773 good_one = 1; 2774 } 2775 } 2776 if (!good_one) { 2777 /* cleanup */ 2778 free(clabel, M_RAIDFRAME); 2779 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2780 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2781 vput(vp); 2782 } 2783 return ac_list; 2784 } 2785 2786 RF_AutoConfig_t * 2787 rf_find_raid_components(void) 2788 { 2789 struct vnode *vp; 2790 struct disklabel label; 2791 device_t dv; 2792 deviter_t di; 2793 dev_t dev; 2794 int bmajor, bminor, wedge, rf_part_found; 2795 int error; 2796 int i; 2797 RF_AutoConfig_t *ac_list; 2798 uint64_t numsecs; 2799 unsigned secsize; 2800 int dowedges; 2801 2802 /* initialize the AutoConfig list */ 2803 ac_list = NULL; 2804 2805 /* 2806 * we begin by trolling through *all* the devices on the system *twice* 2807 * first we scan for wedges, second for other devices. This avoids 2808 * using a raw partition instead of a wedge that covers the whole disk 2809 */ 2810 2811 for (dowedges=1; dowedges>=0; --dowedges) { 2812 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; 2813 dv = deviter_next(&di)) { 2814 2815 /* we are only interested in disks... */ 2816 if (device_class(dv) != DV_DISK) 2817 continue; 2818 2819 /* we don't care about floppies... */ 2820 if (device_is_a(dv, "fd")) { 2821 continue; 2822 } 2823 2824 /* we don't care about CD's... */ 2825 if (device_is_a(dv, "cd")) { 2826 continue; 2827 } 2828 2829 /* we don't care about md's... */ 2830 if (device_is_a(dv, "md")) { 2831 continue; 2832 } 2833 2834 /* hdfd is the Atari/Hades floppy driver */ 2835 if (device_is_a(dv, "hdfd")) { 2836 continue; 2837 } 2838 2839 /* fdisa is the Atari/Milan floppy driver */ 2840 if (device_is_a(dv, "fdisa")) { 2841 continue; 2842 } 2843 2844 /* are we in the wedges pass ? */ 2845 wedge = device_is_a(dv, "dk"); 2846 if (wedge != dowedges) { 2847 continue; 2848 } 2849 2850 /* need to find the device_name_to_block_device_major stuff */ 2851 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 2852 2853 rf_part_found = 0; /*No raid partition as yet*/ 2854 2855 /* get a vnode for the raw partition of this disk */ 2856 bminor = minor(device_unit(dv)); 2857 dev = wedge ? makedev(bmajor, bminor) : 2858 MAKEDISKDEV(bmajor, bminor, RAW_PART); 2859 if (bdevvp(dev, &vp)) 2860 panic("RAID can't alloc vnode"); 2861 2862 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED); 2863 2864 if (error) { 2865 /* "Who cares." Continue looking 2866 for something that exists*/ 2867 vput(vp); 2868 continue; 2869 } 2870 2871 error = getdisksize(vp, &numsecs, &secsize); 2872 if (error) { 2873 /* 2874 * Pseudo devices like vnd and cgd can be 2875 * opened but may still need some configuration. 2876 * Ignore these quietly. 2877 */ 2878 if (error != ENXIO) 2879 printf("RAIDframe: can't get disk size" 2880 " for dev %s (%d)\n", 2881 device_xname(dv), error); 2882 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2883 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2884 vput(vp); 2885 continue; 2886 } 2887 if (wedge) { 2888 struct dkwedge_info dkw; 2889 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 2890 NOCRED); 2891 if (error) { 2892 printf("RAIDframe: can't get wedge info for " 2893 "dev %s (%d)\n", device_xname(dv), error); 2894 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2895 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2896 vput(vp); 2897 continue; 2898 } 2899 2900 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 2901 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2902 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2903 vput(vp); 2904 continue; 2905 } 2906 2907 ac_list = rf_get_component(ac_list, dev, vp, 2908 device_xname(dv), dkw.dkw_size, numsecs, secsize); 2909 rf_part_found = 1; /*There is a raid component on this disk*/ 2910 continue; 2911 } 2912 2913 /* Ok, the disk exists. Go get the disklabel. */ 2914 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 2915 if (error) { 2916 /* 2917 * XXX can't happen - open() would 2918 * have errored out (or faked up one) 2919 */ 2920 if (error != ENOTTY) 2921 printf("RAIDframe: can't get label for dev " 2922 "%s (%d)\n", device_xname(dv), error); 2923 } 2924 2925 /* don't need this any more. We'll allocate it again 2926 a little later if we really do... */ 2927 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2928 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2929 vput(vp); 2930 2931 if (error) 2932 continue; 2933 2934 rf_part_found = 0; /*No raid partitions yet*/ 2935 for (i = 0; i < label.d_npartitions; i++) { 2936 char cname[sizeof(ac_list->devname)]; 2937 2938 /* We only support partitions marked as RAID */ 2939 if (label.d_partitions[i].p_fstype != FS_RAID) 2940 continue; 2941 2942 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 2943 if (bdevvp(dev, &vp)) 2944 panic("RAID can't alloc vnode"); 2945 2946 error = VOP_OPEN(vp, FREAD, NOCRED); 2947 if (error) { 2948 /* Whatever... */ 2949 vput(vp); 2950 continue; 2951 } 2952 snprintf(cname, sizeof(cname), "%s%c", 2953 device_xname(dv), 'a' + i); 2954 ac_list = rf_get_component(ac_list, dev, vp, cname, 2955 label.d_partitions[i].p_size, numsecs, secsize); 2956 rf_part_found = 1; /*There is at least one raid partition on this disk*/ 2957 } 2958 2959 /* 2960 *If there is no raid component on this disk, either in a 2961 *disklabel or inside a wedge, check the raw partition as well, 2962 *as it is possible to configure raid components on raw disk 2963 *devices. 2964 */ 2965 2966 if (!rf_part_found) { 2967 char cname[sizeof(ac_list->devname)]; 2968 2969 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART); 2970 if (bdevvp(dev, &vp)) 2971 panic("RAID can't alloc vnode"); 2972 2973 error = VOP_OPEN(vp, FREAD, NOCRED); 2974 if (error) { 2975 /* Whatever... */ 2976 vput(vp); 2977 continue; 2978 } 2979 snprintf(cname, sizeof(cname), "%s%c", 2980 device_xname(dv), 'a' + RAW_PART); 2981 ac_list = rf_get_component(ac_list, dev, vp, cname, 2982 label.d_partitions[RAW_PART].p_size, numsecs, secsize); 2983 } 2984 } 2985 deviter_release(&di); 2986 } 2987 return ac_list; 2988 } 2989 2990 2991 int 2992 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs) 2993 { 2994 2995 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 2996 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 2997 ((clabel->clean == RF_RAID_CLEAN) || 2998 (clabel->clean == RF_RAID_DIRTY)) && 2999 clabel->row >=0 && 3000 clabel->column >= 0 && 3001 clabel->num_rows > 0 && 3002 clabel->num_columns > 0 && 3003 clabel->row < clabel->num_rows && 3004 clabel->column < clabel->num_columns && 3005 clabel->blockSize > 0 && 3006 /* 3007 * numBlocksHi may contain garbage, but it is ok since 3008 * the type is unsigned. If it is really garbage, 3009 * rf_fix_old_label_size() will fix it. 3010 */ 3011 rf_component_label_numblocks(clabel) > 0) { 3012 /* 3013 * label looks reasonable enough... 3014 * let's make sure it has no old garbage. 3015 */ 3016 if (numsecs) 3017 rf_fix_old_label_size(clabel, numsecs); 3018 return(1); 3019 } 3020 return(0); 3021 } 3022 3023 3024 /* 3025 * For reasons yet unknown, some old component labels have garbage in 3026 * the newer numBlocksHi region, and this causes lossage. Since those 3027 * disks will also have numsecs set to less than 32 bits of sectors, 3028 * we can determine when this corruption has occurred, and fix it. 3029 * 3030 * The exact same problem, with the same unknown reason, happens to 3031 * the partitionSizeHi member as well. 3032 */ 3033 static void 3034 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3035 { 3036 3037 if (numsecs < ((uint64_t)1 << 32)) { 3038 if (clabel->numBlocksHi) { 3039 printf("WARNING: total sectors < 32 bits, yet " 3040 "numBlocksHi set\n" 3041 "WARNING: resetting numBlocksHi to zero.\n"); 3042 clabel->numBlocksHi = 0; 3043 } 3044 3045 if (clabel->partitionSizeHi) { 3046 printf("WARNING: total sectors < 32 bits, yet " 3047 "partitionSizeHi set\n" 3048 "WARNING: resetting partitionSizeHi to zero.\n"); 3049 clabel->partitionSizeHi = 0; 3050 } 3051 } 3052 } 3053 3054 3055 #ifdef DEBUG 3056 void 3057 rf_print_component_label(RF_ComponentLabel_t *clabel) 3058 { 3059 uint64_t numBlocks; 3060 static const char *rp[] = { 3061 "No", "Force", "Soft", "*invalid*" 3062 }; 3063 3064 3065 numBlocks = rf_component_label_numblocks(clabel); 3066 3067 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3068 clabel->row, clabel->column, 3069 clabel->num_rows, clabel->num_columns); 3070 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3071 clabel->version, clabel->serial_number, 3072 clabel->mod_counter); 3073 printf(" Clean: %s Status: %d\n", 3074 clabel->clean ? "Yes" : "No", clabel->status); 3075 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3076 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3077 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n", 3078 (char) clabel->parityConfig, clabel->blockSize, numBlocks); 3079 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No"); 3080 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]); 3081 printf(" Last configured as: raid%d\n", clabel->last_unit); 3082 #if 0 3083 printf(" Config order: %d\n", clabel->config_order); 3084 #endif 3085 3086 } 3087 #endif 3088 3089 RF_ConfigSet_t * 3090 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3091 { 3092 RF_AutoConfig_t *ac; 3093 RF_ConfigSet_t *config_sets; 3094 RF_ConfigSet_t *cset; 3095 RF_AutoConfig_t *ac_next; 3096 3097 3098 config_sets = NULL; 3099 3100 /* Go through the AutoConfig list, and figure out which components 3101 belong to what sets. */ 3102 ac = ac_list; 3103 while(ac!=NULL) { 3104 /* we're going to putz with ac->next, so save it here 3105 for use at the end of the loop */ 3106 ac_next = ac->next; 3107 3108 if (config_sets == NULL) { 3109 /* will need at least this one... */ 3110 config_sets = (RF_ConfigSet_t *) 3111 malloc(sizeof(RF_ConfigSet_t), 3112 M_RAIDFRAME, M_NOWAIT); 3113 if (config_sets == NULL) { 3114 panic("rf_create_auto_sets: No memory!"); 3115 } 3116 /* this one is easy :) */ 3117 config_sets->ac = ac; 3118 config_sets->next = NULL; 3119 config_sets->rootable = 0; 3120 ac->next = NULL; 3121 } else { 3122 /* which set does this component fit into? */ 3123 cset = config_sets; 3124 while(cset!=NULL) { 3125 if (rf_does_it_fit(cset, ac)) { 3126 /* looks like it matches... */ 3127 ac->next = cset->ac; 3128 cset->ac = ac; 3129 break; 3130 } 3131 cset = cset->next; 3132 } 3133 if (cset==NULL) { 3134 /* didn't find a match above... new set..*/ 3135 cset = (RF_ConfigSet_t *) 3136 malloc(sizeof(RF_ConfigSet_t), 3137 M_RAIDFRAME, M_NOWAIT); 3138 if (cset == NULL) { 3139 panic("rf_create_auto_sets: No memory!"); 3140 } 3141 cset->ac = ac; 3142 ac->next = NULL; 3143 cset->next = config_sets; 3144 cset->rootable = 0; 3145 config_sets = cset; 3146 } 3147 } 3148 ac = ac_next; 3149 } 3150 3151 3152 return(config_sets); 3153 } 3154 3155 static int 3156 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3157 { 3158 RF_ComponentLabel_t *clabel1, *clabel2; 3159 3160 /* If this one matches the *first* one in the set, that's good 3161 enough, since the other members of the set would have been 3162 through here too... */ 3163 /* note that we are not checking partitionSize here.. 3164 3165 Note that we are also not checking the mod_counters here. 3166 If everything else matches except the mod_counter, that's 3167 good enough for this test. We will deal with the mod_counters 3168 a little later in the autoconfiguration process. 3169 3170 (clabel1->mod_counter == clabel2->mod_counter) && 3171 3172 The reason we don't check for this is that failed disks 3173 will have lower modification counts. If those disks are 3174 not added to the set they used to belong to, then they will 3175 form their own set, which may result in 2 different sets, 3176 for example, competing to be configured at raid0, and 3177 perhaps competing to be the root filesystem set. If the 3178 wrong ones get configured, or both attempt to become /, 3179 weird behaviour and or serious lossage will occur. Thus we 3180 need to bring them into the fold here, and kick them out at 3181 a later point. 3182 3183 */ 3184 3185 clabel1 = cset->ac->clabel; 3186 clabel2 = ac->clabel; 3187 if ((clabel1->version == clabel2->version) && 3188 (clabel1->serial_number == clabel2->serial_number) && 3189 (clabel1->num_rows == clabel2->num_rows) && 3190 (clabel1->num_columns == clabel2->num_columns) && 3191 (clabel1->sectPerSU == clabel2->sectPerSU) && 3192 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3193 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3194 (clabel1->parityConfig == clabel2->parityConfig) && 3195 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3196 (clabel1->blockSize == clabel2->blockSize) && 3197 rf_component_label_numblocks(clabel1) == 3198 rf_component_label_numblocks(clabel2) && 3199 (clabel1->autoconfigure == clabel2->autoconfigure) && 3200 (clabel1->root_partition == clabel2->root_partition) && 3201 (clabel1->last_unit == clabel2->last_unit) && 3202 (clabel1->config_order == clabel2->config_order)) { 3203 /* if it get's here, it almost *has* to be a match */ 3204 } else { 3205 /* it's not consistent with somebody in the set.. 3206 punt */ 3207 return(0); 3208 } 3209 /* all was fine.. it must fit... */ 3210 return(1); 3211 } 3212 3213 int 3214 rf_have_enough_components(RF_ConfigSet_t *cset) 3215 { 3216 RF_AutoConfig_t *ac; 3217 RF_AutoConfig_t *auto_config; 3218 RF_ComponentLabel_t *clabel; 3219 int c; 3220 int num_cols; 3221 int num_missing; 3222 int mod_counter; 3223 int mod_counter_found; 3224 int even_pair_failed; 3225 char parity_type; 3226 3227 3228 /* check to see that we have enough 'live' components 3229 of this set. If so, we can configure it if necessary */ 3230 3231 num_cols = cset->ac->clabel->num_columns; 3232 parity_type = cset->ac->clabel->parityConfig; 3233 3234 /* XXX Check for duplicate components!?!?!? */ 3235 3236 /* Determine what the mod_counter is supposed to be for this set. */ 3237 3238 mod_counter_found = 0; 3239 mod_counter = 0; 3240 ac = cset->ac; 3241 while(ac!=NULL) { 3242 if (mod_counter_found==0) { 3243 mod_counter = ac->clabel->mod_counter; 3244 mod_counter_found = 1; 3245 } else { 3246 if (ac->clabel->mod_counter > mod_counter) { 3247 mod_counter = ac->clabel->mod_counter; 3248 } 3249 } 3250 ac = ac->next; 3251 } 3252 3253 num_missing = 0; 3254 auto_config = cset->ac; 3255 3256 even_pair_failed = 0; 3257 for(c=0; c<num_cols; c++) { 3258 ac = auto_config; 3259 while(ac!=NULL) { 3260 if ((ac->clabel->column == c) && 3261 (ac->clabel->mod_counter == mod_counter)) { 3262 /* it's this one... */ 3263 #ifdef DEBUG 3264 printf("Found: %s at %d\n", 3265 ac->devname,c); 3266 #endif 3267 break; 3268 } 3269 ac=ac->next; 3270 } 3271 if (ac==NULL) { 3272 /* Didn't find one here! */ 3273 /* special case for RAID 1, especially 3274 where there are more than 2 3275 components (where RAIDframe treats 3276 things a little differently :( ) */ 3277 if (parity_type == '1') { 3278 if (c%2 == 0) { /* even component */ 3279 even_pair_failed = 1; 3280 } else { /* odd component. If 3281 we're failed, and 3282 so is the even 3283 component, it's 3284 "Good Night, Charlie" */ 3285 if (even_pair_failed == 1) { 3286 return(0); 3287 } 3288 } 3289 } else { 3290 /* normal accounting */ 3291 num_missing++; 3292 } 3293 } 3294 if ((parity_type == '1') && (c%2 == 1)) { 3295 /* Just did an even component, and we didn't 3296 bail.. reset the even_pair_failed flag, 3297 and go on to the next component.... */ 3298 even_pair_failed = 0; 3299 } 3300 } 3301 3302 clabel = cset->ac->clabel; 3303 3304 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3305 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3306 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3307 /* XXX this needs to be made *much* more general */ 3308 /* Too many failures */ 3309 return(0); 3310 } 3311 /* otherwise, all is well, and we've got enough to take a kick 3312 at autoconfiguring this set */ 3313 return(1); 3314 } 3315 3316 void 3317 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3318 RF_Raid_t *raidPtr) 3319 { 3320 RF_ComponentLabel_t *clabel; 3321 int i; 3322 3323 clabel = ac->clabel; 3324 3325 /* 1. Fill in the common stuff */ 3326 config->numCol = clabel->num_columns; 3327 config->numSpare = 0; /* XXX should this be set here? */ 3328 config->sectPerSU = clabel->sectPerSU; 3329 config->SUsPerPU = clabel->SUsPerPU; 3330 config->SUsPerRU = clabel->SUsPerRU; 3331 config->parityConfig = clabel->parityConfig; 3332 /* XXX... */ 3333 strcpy(config->diskQueueType,"fifo"); 3334 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3335 config->layoutSpecificSize = 0; /* XXX ?? */ 3336 3337 while(ac!=NULL) { 3338 /* row/col values will be in range due to the checks 3339 in reasonable_label() */ 3340 strcpy(config->devnames[0][ac->clabel->column], 3341 ac->devname); 3342 ac = ac->next; 3343 } 3344 3345 for(i=0;i<RF_MAXDBGV;i++) { 3346 config->debugVars[i][0] = 0; 3347 } 3348 } 3349 3350 int 3351 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3352 { 3353 RF_ComponentLabel_t *clabel; 3354 int column; 3355 int sparecol; 3356 3357 raidPtr->autoconfigure = new_value; 3358 3359 for(column=0; column<raidPtr->numCol; column++) { 3360 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3361 clabel = raidget_component_label(raidPtr, column); 3362 clabel->autoconfigure = new_value; 3363 raidflush_component_label(raidPtr, column); 3364 } 3365 } 3366 for(column = 0; column < raidPtr->numSpare ; column++) { 3367 sparecol = raidPtr->numCol + column; 3368 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3369 clabel = raidget_component_label(raidPtr, sparecol); 3370 clabel->autoconfigure = new_value; 3371 raidflush_component_label(raidPtr, sparecol); 3372 } 3373 } 3374 return(new_value); 3375 } 3376 3377 int 3378 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3379 { 3380 RF_ComponentLabel_t *clabel; 3381 int column; 3382 int sparecol; 3383 3384 raidPtr->root_partition = new_value; 3385 for(column=0; column<raidPtr->numCol; column++) { 3386 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3387 clabel = raidget_component_label(raidPtr, column); 3388 clabel->root_partition = new_value; 3389 raidflush_component_label(raidPtr, column); 3390 } 3391 } 3392 for(column = 0; column < raidPtr->numSpare ; column++) { 3393 sparecol = raidPtr->numCol + column; 3394 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3395 clabel = raidget_component_label(raidPtr, sparecol); 3396 clabel->root_partition = new_value; 3397 raidflush_component_label(raidPtr, sparecol); 3398 } 3399 } 3400 return(new_value); 3401 } 3402 3403 void 3404 rf_release_all_vps(RF_ConfigSet_t *cset) 3405 { 3406 RF_AutoConfig_t *ac; 3407 3408 ac = cset->ac; 3409 while(ac!=NULL) { 3410 /* Close the vp, and give it back */ 3411 if (ac->vp) { 3412 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3413 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED); 3414 vput(ac->vp); 3415 ac->vp = NULL; 3416 } 3417 ac = ac->next; 3418 } 3419 } 3420 3421 3422 void 3423 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3424 { 3425 RF_AutoConfig_t *ac; 3426 RF_AutoConfig_t *next_ac; 3427 3428 ac = cset->ac; 3429 while(ac!=NULL) { 3430 next_ac = ac->next; 3431 /* nuke the label */ 3432 free(ac->clabel, M_RAIDFRAME); 3433 /* cleanup the config structure */ 3434 free(ac, M_RAIDFRAME); 3435 /* "next.." */ 3436 ac = next_ac; 3437 } 3438 /* and, finally, nuke the config set */ 3439 free(cset, M_RAIDFRAME); 3440 } 3441 3442 3443 void 3444 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3445 { 3446 /* current version number */ 3447 clabel->version = RF_COMPONENT_LABEL_VERSION; 3448 clabel->serial_number = raidPtr->serial_number; 3449 clabel->mod_counter = raidPtr->mod_counter; 3450 3451 clabel->num_rows = 1; 3452 clabel->num_columns = raidPtr->numCol; 3453 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3454 clabel->status = rf_ds_optimal; /* "It's good!" */ 3455 3456 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3457 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3458 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3459 3460 clabel->blockSize = raidPtr->bytesPerSector; 3461 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk); 3462 3463 /* XXX not portable */ 3464 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3465 clabel->maxOutstanding = raidPtr->maxOutstanding; 3466 clabel->autoconfigure = raidPtr->autoconfigure; 3467 clabel->root_partition = raidPtr->root_partition; 3468 clabel->last_unit = raidPtr->raidid; 3469 clabel->config_order = raidPtr->config_order; 3470 3471 #ifndef RF_NO_PARITY_MAP 3472 rf_paritymap_init_label(raidPtr->parity_map, clabel); 3473 #endif 3474 } 3475 3476 struct raid_softc * 3477 rf_auto_config_set(RF_ConfigSet_t *cset) 3478 { 3479 RF_Raid_t *raidPtr; 3480 RF_Config_t *config; 3481 int raidID; 3482 struct raid_softc *sc; 3483 3484 #ifdef DEBUG 3485 printf("RAID autoconfigure\n"); 3486 #endif 3487 3488 /* 1. Create a config structure */ 3489 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO); 3490 if (config == NULL) { 3491 printf("%s: Out of mem - config!?!?\n", __func__); 3492 /* XXX do something more intelligent here. */ 3493 return NULL; 3494 } 3495 3496 /* 3497 2. Figure out what RAID ID this one is supposed to live at 3498 See if we can get the same RAID dev that it was configured 3499 on last time.. 3500 */ 3501 3502 raidID = cset->ac->clabel->last_unit; 3503 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0; 3504 sc = raidget(++raidID, false)) 3505 continue; 3506 #ifdef DEBUG 3507 printf("Configuring raid%d:\n",raidID); 3508 #endif 3509 3510 if (sc == NULL) 3511 sc = raidget(raidID, true); 3512 if (sc == NULL) { 3513 printf("%s: Out of mem - softc!?!?\n", __func__); 3514 /* XXX do something more intelligent here. */ 3515 free(config, M_RAIDFRAME); 3516 return NULL; 3517 } 3518 3519 raidPtr = &sc->sc_r; 3520 3521 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3522 raidPtr->softc = sc; 3523 raidPtr->raidid = raidID; 3524 raidPtr->openings = RAIDOUTSTANDING; 3525 3526 /* 3. Build the configuration structure */ 3527 rf_create_configuration(cset->ac, config, raidPtr); 3528 3529 /* 4. Do the configuration */ 3530 if (rf_Configure(raidPtr, config, cset->ac) == 0) { 3531 raidinit(sc); 3532 3533 rf_markalldirty(raidPtr); 3534 raidPtr->autoconfigure = 1; /* XXX do this here? */ 3535 switch (cset->ac->clabel->root_partition) { 3536 case 1: /* Force Root */ 3537 case 2: /* Soft Root: root when boot partition part of raid */ 3538 /* 3539 * everything configured just fine. Make a note 3540 * that this set is eligible to be root, 3541 * or forced to be root 3542 */ 3543 cset->rootable = cset->ac->clabel->root_partition; 3544 /* XXX do this here? */ 3545 raidPtr->root_partition = cset->rootable; 3546 break; 3547 default: 3548 break; 3549 } 3550 } else { 3551 raidput(sc); 3552 sc = NULL; 3553 } 3554 3555 /* 5. Cleanup */ 3556 free(config, M_RAIDFRAME); 3557 return sc; 3558 } 3559 3560 void 3561 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3562 size_t xmin, size_t xmax) 3563 { 3564 int error; 3565 3566 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3567 pool_sethiwat(p, xmax); 3568 if ((error = pool_prime(p, xmin)) != 0) 3569 panic("%s: failed to prime pool: %d", __func__, error); 3570 pool_setlowat(p, xmin); 3571 } 3572 3573 /* 3574 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue 3575 * to see if there is IO pending and if that IO could possibly be done 3576 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1 3577 * otherwise. 3578 * 3579 */ 3580 int 3581 rf_buf_queue_check(RF_Raid_t *raidPtr) 3582 { 3583 struct raid_softc *rs; 3584 struct dk_softc *dksc; 3585 3586 rs = raidPtr->softc; 3587 dksc = &rs->sc_dksc; 3588 3589 if ((rs->sc_flags & RAIDF_INITED) == 0) 3590 return 1; 3591 3592 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) { 3593 /* there is work to do */ 3594 return 0; 3595 } 3596 /* default is nothing to do */ 3597 return 1; 3598 } 3599 3600 int 3601 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr) 3602 { 3603 uint64_t numsecs; 3604 unsigned secsize; 3605 int error; 3606 3607 error = getdisksize(vp, &numsecs, &secsize); 3608 if (error == 0) { 3609 diskPtr->blockSize = secsize; 3610 diskPtr->numBlocks = numsecs - rf_protectedSectors; 3611 diskPtr->partitionSize = numsecs; 3612 return 0; 3613 } 3614 return error; 3615 } 3616 3617 static int 3618 raid_match(device_t self, cfdata_t cfdata, void *aux) 3619 { 3620 return 1; 3621 } 3622 3623 static void 3624 raid_attach(device_t parent, device_t self, void *aux) 3625 { 3626 } 3627 3628 3629 static int 3630 raid_detach(device_t self, int flags) 3631 { 3632 int error; 3633 struct raid_softc *rs = raidsoftc(self); 3634 3635 if (rs == NULL) 3636 return ENXIO; 3637 3638 if ((error = raidlock(rs)) != 0) 3639 return (error); 3640 3641 error = raid_detach_unlocked(rs); 3642 3643 raidunlock(rs); 3644 3645 /* XXX raid can be referenced here */ 3646 3647 if (error) 3648 return error; 3649 3650 /* Free the softc */ 3651 raidput(rs); 3652 3653 return 0; 3654 } 3655 3656 static void 3657 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr) 3658 { 3659 struct dk_softc *dksc = &rs->sc_dksc; 3660 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom; 3661 3662 memset(dg, 0, sizeof(*dg)); 3663 3664 dg->dg_secperunit = raidPtr->totalSectors; 3665 dg->dg_secsize = raidPtr->bytesPerSector; 3666 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe; 3667 dg->dg_ntracks = 4 * raidPtr->numCol; 3668 3669 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL); 3670 } 3671 3672 /* 3673 * Get cache info for all the components (including spares). 3674 * Returns intersection of all the cache flags of all disks, or first 3675 * error if any encountered. 3676 * XXXfua feature flags can change as spares are added - lock down somehow 3677 */ 3678 static int 3679 rf_get_component_caches(RF_Raid_t *raidPtr, int *data) 3680 { 3681 int c; 3682 int error; 3683 int dkwhole = 0, dkpart; 3684 3685 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) { 3686 /* 3687 * Check any non-dead disk, even when currently being 3688 * reconstructed. 3689 */ 3690 if (!RF_DEAD_DISK(raidPtr->Disks[c].status) 3691 || raidPtr->Disks[c].status == rf_ds_reconstructing) { 3692 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, 3693 DIOCGCACHE, &dkpart, FREAD, NOCRED); 3694 if (error) { 3695 if (error != ENODEV) { 3696 printf("raid%d: get cache for component %s failed\n", 3697 raidPtr->raidid, 3698 raidPtr->Disks[c].devname); 3699 } 3700 3701 return error; 3702 } 3703 3704 if (c == 0) 3705 dkwhole = dkpart; 3706 else 3707 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart); 3708 } 3709 } 3710 3711 *data = dkwhole; 3712 3713 return 0; 3714 } 3715 3716 /* 3717 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components. 3718 * We end up returning whatever error was returned by the first cache flush 3719 * that fails. 3720 */ 3721 3722 int 3723 rf_sync_component_caches(RF_Raid_t *raidPtr) 3724 { 3725 int c, sparecol; 3726 int e,error; 3727 int force = 1; 3728 3729 error = 0; 3730 for (c = 0; c < raidPtr->numCol; c++) { 3731 if (raidPtr->Disks[c].status == rf_ds_optimal) { 3732 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC, 3733 &force, FWRITE, NOCRED); 3734 if (e) { 3735 if (e != ENODEV) 3736 printf("raid%d: cache flush to component %s failed.\n", 3737 raidPtr->raidid, raidPtr->Disks[c].devname); 3738 if (error == 0) { 3739 error = e; 3740 } 3741 } 3742 } 3743 } 3744 3745 for( c = 0; c < raidPtr->numSpare ; c++) { 3746 sparecol = raidPtr->numCol + c; 3747 /* Need to ensure that the reconstruct actually completed! */ 3748 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3749 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp, 3750 DIOCCACHESYNC, &force, FWRITE, NOCRED); 3751 if (e) { 3752 if (e != ENODEV) 3753 printf("raid%d: cache flush to component %s failed.\n", 3754 raidPtr->raidid, raidPtr->Disks[sparecol].devname); 3755 if (error == 0) { 3756 error = e; 3757 } 3758 } 3759 } 3760 } 3761 return error; 3762 } 3763 3764 /* Fill in info with the current status */ 3765 void 3766 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info) 3767 { 3768 3769 if (raidPtr->status != rf_rs_reconstructing) { 3770 info->total = 100; 3771 info->completed = 100; 3772 } else { 3773 info->total = raidPtr->reconControl->numRUsTotal; 3774 info->completed = raidPtr->reconControl->numRUsComplete; 3775 } 3776 info->remaining = info->total - info->completed; 3777 } 3778 3779 /* Fill in info with the current status */ 3780 void 3781 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info) 3782 { 3783 3784 if (raidPtr->parity_rewrite_in_progress == 1) { 3785 info->total = raidPtr->Layout.numStripe; 3786 info->completed = raidPtr->parity_rewrite_stripes_done; 3787 } else { 3788 info->completed = 100; 3789 info->total = 100; 3790 } 3791 info->remaining = info->total - info->completed; 3792 } 3793 3794 /* Fill in info with the current status */ 3795 void 3796 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info) 3797 { 3798 3799 if (raidPtr->copyback_in_progress == 1) { 3800 info->total = raidPtr->Layout.numStripe; 3801 info->completed = raidPtr->copyback_stripes_done; 3802 info->remaining = info->total - info->completed; 3803 } else { 3804 info->remaining = 0; 3805 info->completed = 100; 3806 info->total = 100; 3807 } 3808 } 3809 3810 /* Fill in config with the current info */ 3811 int 3812 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config) 3813 { 3814 int d, i, j; 3815 3816 if (!raidPtr->valid) 3817 return (ENODEV); 3818 config->cols = raidPtr->numCol; 3819 config->ndevs = raidPtr->numCol; 3820 if (config->ndevs >= RF_MAX_DISKS) 3821 return (ENOMEM); 3822 config->nspares = raidPtr->numSpare; 3823 if (config->nspares >= RF_MAX_DISKS) 3824 return (ENOMEM); 3825 config->maxqdepth = raidPtr->maxQueueDepth; 3826 d = 0; 3827 for (j = 0; j < config->cols; j++) { 3828 config->devs[d] = raidPtr->Disks[j]; 3829 d++; 3830 } 3831 for (j = config->cols, i = 0; i < config->nspares; i++, j++) { 3832 config->spares[i] = raidPtr->Disks[j]; 3833 if (config->spares[i].status == rf_ds_rebuilding_spare) { 3834 /* XXX: raidctl(8) expects to see this as a used spare */ 3835 config->spares[i].status = rf_ds_used_spare; 3836 } 3837 } 3838 return 0; 3839 } 3840 3841 int 3842 rf_get_component_label(RF_Raid_t *raidPtr, void *data) 3843 { 3844 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data; 3845 RF_ComponentLabel_t *raid_clabel; 3846 int column = clabel->column; 3847 3848 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare)) 3849 return EINVAL; 3850 raid_clabel = raidget_component_label(raidPtr, column); 3851 memcpy(clabel, raid_clabel, sizeof *clabel); 3852 3853 return 0; 3854 } 3855 3856 /* 3857 * Module interface 3858 */ 3859 3860 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs"); 3861 3862 #ifdef _MODULE 3863 CFDRIVER_DECL(raid, DV_DISK, NULL); 3864 #endif 3865 3866 static int raid_modcmd(modcmd_t, void *); 3867 static int raid_modcmd_init(void); 3868 static int raid_modcmd_fini(void); 3869 3870 static int 3871 raid_modcmd(modcmd_t cmd, void *data) 3872 { 3873 int error; 3874 3875 error = 0; 3876 switch (cmd) { 3877 case MODULE_CMD_INIT: 3878 error = raid_modcmd_init(); 3879 break; 3880 case MODULE_CMD_FINI: 3881 error = raid_modcmd_fini(); 3882 break; 3883 default: 3884 error = ENOTTY; 3885 break; 3886 } 3887 return error; 3888 } 3889 3890 static int 3891 raid_modcmd_init(void) 3892 { 3893 int error; 3894 int bmajor, cmajor; 3895 3896 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE); 3897 mutex_enter(&raid_lock); 3898 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 3899 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM); 3900 rf_init_cond2(rf_sparet_wait_cv, "sparetw"); 3901 rf_init_cond2(rf_sparet_resp_cv, "rfgst"); 3902 3903 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 3904 #endif 3905 3906 bmajor = cmajor = -1; 3907 error = devsw_attach("raid", &raid_bdevsw, &bmajor, 3908 &raid_cdevsw, &cmajor); 3909 if (error != 0 && error != EEXIST) { 3910 aprint_error("%s: devsw_attach failed %d\n", __func__, error); 3911 mutex_exit(&raid_lock); 3912 return error; 3913 } 3914 #ifdef _MODULE 3915 error = config_cfdriver_attach(&raid_cd); 3916 if (error != 0) { 3917 aprint_error("%s: config_cfdriver_attach failed %d\n", 3918 __func__, error); 3919 devsw_detach(&raid_bdevsw, &raid_cdevsw); 3920 mutex_exit(&raid_lock); 3921 return error; 3922 } 3923 #endif 3924 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca); 3925 if (error != 0) { 3926 aprint_error("%s: config_cfattach_attach failed %d\n", 3927 __func__, error); 3928 #ifdef _MODULE 3929 config_cfdriver_detach(&raid_cd); 3930 #endif 3931 devsw_detach(&raid_bdevsw, &raid_cdevsw); 3932 mutex_exit(&raid_lock); 3933 return error; 3934 } 3935 3936 raidautoconfigdone = false; 3937 3938 mutex_exit(&raid_lock); 3939 3940 if (error == 0) { 3941 if (rf_BootRaidframe(true) == 0) 3942 aprint_verbose("Kernelized RAIDframe activated\n"); 3943 else 3944 panic("Serious error activating RAID!!"); 3945 } 3946 3947 /* 3948 * Register a finalizer which will be used to auto-config RAID 3949 * sets once all real hardware devices have been found. 3950 */ 3951 error = config_finalize_register(NULL, rf_autoconfig); 3952 if (error != 0) { 3953 aprint_error("WARNING: unable to register RAIDframe " 3954 "finalizer\n"); 3955 error = 0; 3956 } 3957 3958 return error; 3959 } 3960 3961 static int 3962 raid_modcmd_fini(void) 3963 { 3964 int error; 3965 3966 mutex_enter(&raid_lock); 3967 3968 /* Don't allow unload if raid device(s) exist. */ 3969 if (!LIST_EMPTY(&raids)) { 3970 mutex_exit(&raid_lock); 3971 return EBUSY; 3972 } 3973 3974 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca); 3975 if (error != 0) { 3976 aprint_error("%s: cannot detach cfattach\n",__func__); 3977 mutex_exit(&raid_lock); 3978 return error; 3979 } 3980 #ifdef _MODULE 3981 error = config_cfdriver_detach(&raid_cd); 3982 if (error != 0) { 3983 aprint_error("%s: cannot detach cfdriver\n",__func__); 3984 config_cfattach_attach(raid_cd.cd_name, &raid_ca); 3985 mutex_exit(&raid_lock); 3986 return error; 3987 } 3988 #endif 3989 error = devsw_detach(&raid_bdevsw, &raid_cdevsw); 3990 if (error != 0) { 3991 aprint_error("%s: cannot detach devsw\n",__func__); 3992 #ifdef _MODULE 3993 config_cfdriver_attach(&raid_cd); 3994 #endif 3995 config_cfattach_attach(raid_cd.cd_name, &raid_ca); 3996 mutex_exit(&raid_lock); 3997 return error; 3998 } 3999 rf_BootRaidframe(false); 4000 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 4001 rf_destroy_mutex2(rf_sparet_wait_mutex); 4002 rf_destroy_cond2(rf_sparet_wait_cv); 4003 rf_destroy_cond2(rf_sparet_resp_cv); 4004 #endif 4005 mutex_exit(&raid_lock); 4006 mutex_destroy(&raid_lock); 4007 4008 return error; 4009 } 4010