1 /* $NetBSD: rf_netbsdkintf.c,v 1.410 2022/08/28 00:37:41 oster Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Greg Oster; Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * Copyright (c) 1995 Carnegie-Mellon University. 72 * All rights reserved. 73 * 74 * Authors: Mark Holland, Jim Zelenka 75 * 76 * Permission to use, copy, modify and distribute this software and 77 * its documentation is hereby granted, provided that both the copyright 78 * notice and this permission notice appear in all copies of the 79 * software, derivative works or modified versions, and any portions 80 * thereof, and that both notices appear in supporting documentation. 81 * 82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 85 * 86 * Carnegie Mellon requests users of this software to return to 87 * 88 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 89 * School of Computer Science 90 * Carnegie Mellon University 91 * Pittsburgh PA 15213-3890 92 * 93 * any improvements or extensions that they make and grant Carnegie the 94 * rights to redistribute these changes. 95 */ 96 97 /*********************************************************** 98 * 99 * rf_kintf.c -- the kernel interface routines for RAIDframe 100 * 101 ***********************************************************/ 102 103 #include <sys/cdefs.h> 104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.410 2022/08/28 00:37:41 oster Exp $"); 105 106 #ifdef _KERNEL_OPT 107 #include "opt_raid_autoconfig.h" 108 #include "opt_compat_netbsd32.h" 109 #endif 110 111 #include <sys/param.h> 112 #include <sys/errno.h> 113 #include <sys/pool.h> 114 #include <sys/proc.h> 115 #include <sys/queue.h> 116 #include <sys/disk.h> 117 #include <sys/device.h> 118 #include <sys/stat.h> 119 #include <sys/ioctl.h> 120 #include <sys/fcntl.h> 121 #include <sys/systm.h> 122 #include <sys/vnode.h> 123 #include <sys/disklabel.h> 124 #include <sys/conf.h> 125 #include <sys/buf.h> 126 #include <sys/bufq.h> 127 #include <sys/reboot.h> 128 #include <sys/kauth.h> 129 #include <sys/module.h> 130 #include <sys/compat_stub.h> 131 132 #include <prop/proplib.h> 133 134 #include <dev/raidframe/raidframevar.h> 135 #include <dev/raidframe/raidframeio.h> 136 #include <dev/raidframe/rf_paritymap.h> 137 138 #include "rf_raid.h" 139 #include "rf_copyback.h" 140 #include "rf_dag.h" 141 #include "rf_dagflags.h" 142 #include "rf_desc.h" 143 #include "rf_diskqueue.h" 144 #include "rf_etimer.h" 145 #include "rf_general.h" 146 #include "rf_kintf.h" 147 #include "rf_options.h" 148 #include "rf_driver.h" 149 #include "rf_parityscan.h" 150 #include "rf_threadstuff.h" 151 152 #include "ioconf.h" 153 154 #ifdef DEBUG 155 int rf_kdebug_level = 0; 156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 157 #else /* DEBUG */ 158 #define db1_printf(a) { } 159 #endif /* DEBUG */ 160 161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 162 static rf_declare_mutex2(rf_sparet_wait_mutex); 163 static rf_declare_cond2(rf_sparet_wait_cv); 164 static rf_declare_cond2(rf_sparet_resp_cv); 165 166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 167 * spare table */ 168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 169 * installation process */ 170 #endif 171 172 const int rf_b_pass = (B_PHYS|B_RAW|B_MEDIA_FLAGS); 173 174 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 175 176 /* prototypes */ 177 static void KernelWakeupFunc(struct buf *); 178 static void InitBP(struct buf *, struct vnode *, unsigned, 179 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 180 void *, int); 181 static void raidinit(struct raid_softc *); 182 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp); 183 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *); 184 185 static int raid_match(device_t, cfdata_t, void *); 186 static void raid_attach(device_t, device_t, void *); 187 static int raid_detach(device_t, int); 188 189 static int raidread_component_area(dev_t, struct vnode *, void *, size_t, 190 daddr_t, daddr_t); 191 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t, 192 daddr_t, daddr_t, int); 193 194 static int raidwrite_component_label(unsigned, 195 dev_t, struct vnode *, RF_ComponentLabel_t *); 196 static int raidread_component_label(unsigned, 197 dev_t, struct vnode *, RF_ComponentLabel_t *); 198 199 static int raid_diskstart(device_t, struct buf *bp); 200 static int raid_dumpblocks(device_t, void *, daddr_t, int); 201 static int raid_lastclose(device_t); 202 203 static dev_type_open(raidopen); 204 static dev_type_close(raidclose); 205 static dev_type_read(raidread); 206 static dev_type_write(raidwrite); 207 static dev_type_ioctl(raidioctl); 208 static dev_type_strategy(raidstrategy); 209 static dev_type_dump(raiddump); 210 static dev_type_size(raidsize); 211 212 const struct bdevsw raid_bdevsw = { 213 .d_open = raidopen, 214 .d_close = raidclose, 215 .d_strategy = raidstrategy, 216 .d_ioctl = raidioctl, 217 .d_dump = raiddump, 218 .d_psize = raidsize, 219 .d_discard = nodiscard, 220 .d_flag = D_DISK 221 }; 222 223 const struct cdevsw raid_cdevsw = { 224 .d_open = raidopen, 225 .d_close = raidclose, 226 .d_read = raidread, 227 .d_write = raidwrite, 228 .d_ioctl = raidioctl, 229 .d_stop = nostop, 230 .d_tty = notty, 231 .d_poll = nopoll, 232 .d_mmap = nommap, 233 .d_kqfilter = nokqfilter, 234 .d_discard = nodiscard, 235 .d_flag = D_DISK 236 }; 237 238 static struct dkdriver rf_dkdriver = { 239 .d_open = raidopen, 240 .d_close = raidclose, 241 .d_strategy = raidstrategy, 242 .d_diskstart = raid_diskstart, 243 .d_dumpblocks = raid_dumpblocks, 244 .d_lastclose = raid_lastclose, 245 .d_minphys = minphys 246 }; 247 248 #define raidunit(x) DISKUNIT(x) 249 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc) 250 251 extern struct cfdriver raid_cd; 252 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc), 253 raid_match, raid_attach, raid_detach, NULL, NULL, NULL, 254 DVF_DETACH_SHUTDOWN); 255 256 /* Internal representation of a rf_recon_req */ 257 struct rf_recon_req_internal { 258 RF_RowCol_t col; 259 RF_ReconReqFlags_t flags; 260 void *raidPtr; 261 }; 262 263 /* 264 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 265 * Be aware that large numbers can allow the driver to consume a lot of 266 * kernel memory, especially on writes, and in degraded mode reads. 267 * 268 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 269 * a single 64K write will typically require 64K for the old data, 270 * 64K for the old parity, and 64K for the new parity, for a total 271 * of 192K (if the parity buffer is not re-used immediately). 272 * Even it if is used immediately, that's still 128K, which when multiplied 273 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 274 * 275 * Now in degraded mode, for example, a 64K read on the above setup may 276 * require data reconstruction, which will require *all* of the 4 remaining 277 * disks to participate -- 4 * 32K/disk == 128K again. 278 */ 279 280 #ifndef RAIDOUTSTANDING 281 #define RAIDOUTSTANDING 6 282 #endif 283 284 #define RAIDLABELDEV(dev) \ 285 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 286 287 /* declared here, and made public, for the benefit of KVM stuff.. */ 288 289 static int raidlock(struct raid_softc *); 290 static void raidunlock(struct raid_softc *); 291 292 static int raid_detach_unlocked(struct raid_softc *); 293 294 static void rf_markalldirty(RF_Raid_t *); 295 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *); 296 297 static void rf_ReconThread(struct rf_recon_req_internal *); 298 static void rf_RewriteParityThread(RF_Raid_t *raidPtr); 299 static void rf_CopybackThread(RF_Raid_t *raidPtr); 300 static void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *); 301 static int rf_autoconfig(device_t); 302 static int rf_rescan(void); 303 static void rf_buildroothack(RF_ConfigSet_t *); 304 305 static RF_AutoConfig_t *rf_find_raid_components(void); 306 static RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 307 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 308 static void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 309 static int rf_set_autoconfig(RF_Raid_t *, int); 310 static int rf_set_rootpartition(RF_Raid_t *, int); 311 static void rf_release_all_vps(RF_ConfigSet_t *); 312 static void rf_cleanup_config_set(RF_ConfigSet_t *); 313 static int rf_have_enough_components(RF_ConfigSet_t *); 314 static struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *); 315 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t); 316 317 /* 318 * Debugging, mostly. Set to 0 to not allow autoconfig to take place. 319 * Note that this is overridden by having RAID_AUTOCONFIG as an option 320 * in the kernel config file. 321 */ 322 #ifdef RAID_AUTOCONFIG 323 int raidautoconfig = 1; 324 #else 325 int raidautoconfig = 0; 326 #endif 327 static bool raidautoconfigdone = false; 328 329 struct pool rf_alloclist_pool; /* AllocList */ 330 331 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids); 332 static kmutex_t raid_lock; 333 334 static struct raid_softc * 335 raidcreate(int unit) { 336 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 337 sc->sc_unit = unit; 338 cv_init(&sc->sc_cv, "raidunit"); 339 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE); 340 return sc; 341 } 342 343 static void 344 raiddestroy(struct raid_softc *sc) { 345 cv_destroy(&sc->sc_cv); 346 mutex_destroy(&sc->sc_mutex); 347 kmem_free(sc, sizeof(*sc)); 348 } 349 350 static struct raid_softc * 351 raidget(int unit, bool create) { 352 struct raid_softc *sc; 353 if (unit < 0) { 354 #ifdef DIAGNOSTIC 355 panic("%s: unit %d!", __func__, unit); 356 #endif 357 return NULL; 358 } 359 mutex_enter(&raid_lock); 360 LIST_FOREACH(sc, &raids, sc_link) { 361 if (sc->sc_unit == unit) { 362 mutex_exit(&raid_lock); 363 return sc; 364 } 365 } 366 mutex_exit(&raid_lock); 367 if (!create) 368 return NULL; 369 sc = raidcreate(unit); 370 mutex_enter(&raid_lock); 371 LIST_INSERT_HEAD(&raids, sc, sc_link); 372 mutex_exit(&raid_lock); 373 return sc; 374 } 375 376 static void 377 raidput(struct raid_softc *sc) { 378 mutex_enter(&raid_lock); 379 LIST_REMOVE(sc, sc_link); 380 mutex_exit(&raid_lock); 381 raiddestroy(sc); 382 } 383 384 void 385 raidattach(int num) 386 { 387 388 /* 389 * Device attachment and associated initialization now occurs 390 * as part of the module initialization. 391 */ 392 } 393 394 static int 395 rf_autoconfig(device_t self) 396 { 397 RF_AutoConfig_t *ac_list; 398 RF_ConfigSet_t *config_sets; 399 400 if (!raidautoconfig || raidautoconfigdone == true) 401 return 0; 402 403 /* XXX This code can only be run once. */ 404 raidautoconfigdone = true; 405 406 #ifdef __HAVE_CPU_BOOTCONF 407 /* 408 * 0. find the boot device if needed first so we can use it later 409 * this needs to be done before we autoconfigure any raid sets, 410 * because if we use wedges we are not going to be able to open 411 * the boot device later 412 */ 413 if (booted_device == NULL) 414 cpu_bootconf(); 415 #endif 416 /* 1. locate all RAID components on the system */ 417 aprint_debug("Searching for RAID components...\n"); 418 ac_list = rf_find_raid_components(); 419 420 /* 2. Sort them into their respective sets. */ 421 config_sets = rf_create_auto_sets(ac_list); 422 423 /* 424 * 3. Evaluate each set and configure the valid ones. 425 * This gets done in rf_buildroothack(). 426 */ 427 rf_buildroothack(config_sets); 428 429 return 1; 430 } 431 432 int 433 rf_inited(const struct raid_softc *rs) { 434 return (rs->sc_flags & RAIDF_INITED) != 0; 435 } 436 437 RF_Raid_t * 438 rf_get_raid(struct raid_softc *rs) { 439 return &rs->sc_r; 440 } 441 442 int 443 rf_get_unit(const struct raid_softc *rs) { 444 return rs->sc_unit; 445 } 446 447 static int 448 rf_containsboot(RF_Raid_t *r, device_t bdv) { 449 const char *bootname; 450 size_t len; 451 452 /* if bdv is NULL, the set can't contain it. exit early. */ 453 if (bdv == NULL) 454 return 0; 455 456 bootname = device_xname(bdv); 457 len = strlen(bootname); 458 459 for (int col = 0; col < r->numCol; col++) { 460 const char *devname = r->Disks[col].devname; 461 devname += sizeof("/dev/") - 1; 462 if (strncmp(devname, "dk", 2) == 0) { 463 const char *parent = 464 dkwedge_get_parent_name(r->Disks[col].dev); 465 if (parent != NULL) 466 devname = parent; 467 } 468 if (strncmp(devname, bootname, len) == 0) { 469 struct raid_softc *sc = r->softc; 470 aprint_debug("raid%d includes boot device %s\n", 471 sc->sc_unit, devname); 472 return 1; 473 } 474 } 475 return 0; 476 } 477 478 static int 479 rf_rescan(void) 480 { 481 RF_AutoConfig_t *ac_list; 482 RF_ConfigSet_t *config_sets, *cset, *next_cset; 483 struct raid_softc *sc; 484 int raid_added; 485 486 ac_list = rf_find_raid_components(); 487 config_sets = rf_create_auto_sets(ac_list); 488 489 raid_added = 1; 490 while (raid_added > 0) { 491 raid_added = 0; 492 cset = config_sets; 493 while (cset != NULL) { 494 next_cset = cset->next; 495 if (rf_have_enough_components(cset) && 496 cset->ac->clabel->autoconfigure == 1) { 497 sc = rf_auto_config_set(cset); 498 if (sc != NULL) { 499 aprint_debug("raid%d: configured ok, rootable %d\n", 500 sc->sc_unit, cset->rootable); 501 /* We added one RAID set */ 502 raid_added++; 503 } else { 504 /* The autoconfig didn't work :( */ 505 aprint_debug("Autoconfig failed\n"); 506 rf_release_all_vps(cset); 507 } 508 } else { 509 /* we're not autoconfiguring this set... 510 release the associated resources */ 511 rf_release_all_vps(cset); 512 } 513 /* cleanup */ 514 rf_cleanup_config_set(cset); 515 cset = next_cset; 516 } 517 if (raid_added > 0) { 518 /* We added at least one RAID set, so re-scan for recursive RAID */ 519 ac_list = rf_find_raid_components(); 520 config_sets = rf_create_auto_sets(ac_list); 521 } 522 } 523 524 return 0; 525 } 526 527 528 static void 529 rf_buildroothack(RF_ConfigSet_t *config_sets) 530 { 531 RF_AutoConfig_t *ac_list; 532 RF_ConfigSet_t *cset; 533 RF_ConfigSet_t *next_cset; 534 int num_root; 535 int raid_added; 536 struct raid_softc *sc, *rsc; 537 struct dk_softc *dksc = NULL; /* XXX gcc -Os: may be used uninit. */ 538 539 sc = rsc = NULL; 540 num_root = 0; 541 542 raid_added = 1; 543 while (raid_added > 0) { 544 raid_added = 0; 545 cset = config_sets; 546 while (cset != NULL) { 547 next_cset = cset->next; 548 if (rf_have_enough_components(cset) && 549 cset->ac->clabel->autoconfigure == 1) { 550 sc = rf_auto_config_set(cset); 551 if (sc != NULL) { 552 aprint_debug("raid%d: configured ok, rootable %d\n", 553 sc->sc_unit, cset->rootable); 554 /* We added one RAID set */ 555 raid_added++; 556 if (cset->rootable) { 557 rsc = sc; 558 num_root++; 559 } 560 } else { 561 /* The autoconfig didn't work :( */ 562 aprint_debug("Autoconfig failed\n"); 563 rf_release_all_vps(cset); 564 } 565 } else { 566 /* we're not autoconfiguring this set... 567 release the associated resources */ 568 rf_release_all_vps(cset); 569 } 570 /* cleanup */ 571 rf_cleanup_config_set(cset); 572 cset = next_cset; 573 } 574 if (raid_added > 0) { 575 /* We added at least one RAID set, so re-scan for recursive RAID */ 576 ac_list = rf_find_raid_components(); 577 config_sets = rf_create_auto_sets(ac_list); 578 } 579 } 580 581 /* if the user has specified what the root device should be 582 then we don't touch booted_device or boothowto... */ 583 584 if (rootspec != NULL) { 585 aprint_debug("%s: rootspec %s\n", __func__, rootspec); 586 return; 587 } 588 589 /* we found something bootable... */ 590 591 /* 592 * XXX: The following code assumes that the root raid 593 * is the first ('a') partition. This is about the best 594 * we can do with a BSD disklabel, but we might be able 595 * to do better with a GPT label, by setting a specified 596 * attribute to indicate the root partition. We can then 597 * stash the partition number in the r->root_partition 598 * high bits (the bottom 2 bits are already used). For 599 * now we just set booted_partition to 0 when we override 600 * root. 601 */ 602 if (num_root == 1) { 603 device_t candidate_root; 604 dksc = &rsc->sc_dksc; 605 if (dksc->sc_dkdev.dk_nwedges != 0) { 606 char cname[sizeof(cset->ac->devname)]; 607 /* XXX: assume partition 'a' first */ 608 snprintf(cname, sizeof(cname), "%s%c", 609 device_xname(dksc->sc_dev), 'a'); 610 candidate_root = dkwedge_find_by_wname(cname); 611 aprint_debug("%s: candidate wedge root=%s\n", __func__, 612 cname); 613 if (candidate_root == NULL) { 614 /* 615 * If that is not found, because we don't use 616 * disklabel, return the first dk child 617 * XXX: we can skip the 'a' check above 618 * and always do this... 619 */ 620 size_t i = 0; 621 candidate_root = dkwedge_find_by_parent( 622 device_xname(dksc->sc_dev), &i); 623 } 624 aprint_debug("%s: candidate wedge root=%p\n", __func__, 625 candidate_root); 626 } else 627 candidate_root = dksc->sc_dev; 628 aprint_debug("%s: candidate root=%p booted_device=%p " 629 "root_partition=%d contains_boot=%d\n", 630 __func__, candidate_root, booted_device, 631 rsc->sc_r.root_partition, 632 rf_containsboot(&rsc->sc_r, booted_device)); 633 /* XXX the check for booted_device == NULL can probably be 634 * dropped, now that rf_containsboot handles that case. 635 */ 636 if (booted_device == NULL || 637 rsc->sc_r.root_partition == 1 || 638 rf_containsboot(&rsc->sc_r, booted_device)) { 639 booted_device = candidate_root; 640 booted_method = "raidframe/single"; 641 booted_partition = 0; /* XXX assume 'a' */ 642 aprint_debug("%s: set booted_device=%s(%p)\n", __func__, 643 device_xname(booted_device), booted_device); 644 } 645 } else if (num_root > 1) { 646 aprint_debug("%s: many roots=%d, %p\n", __func__, num_root, 647 booted_device); 648 649 /* 650 * Maybe the MD code can help. If it cannot, then 651 * setroot() will discover that we have no 652 * booted_device and will ask the user if nothing was 653 * hardwired in the kernel config file 654 */ 655 if (booted_device == NULL) 656 return; 657 658 num_root = 0; 659 mutex_enter(&raid_lock); 660 LIST_FOREACH(sc, &raids, sc_link) { 661 RF_Raid_t *r = &sc->sc_r; 662 if (r->valid == 0) 663 continue; 664 665 if (r->root_partition == 0) 666 continue; 667 668 if (rf_containsboot(r, booted_device)) { 669 num_root++; 670 rsc = sc; 671 dksc = &rsc->sc_dksc; 672 } 673 } 674 mutex_exit(&raid_lock); 675 676 if (num_root == 1) { 677 booted_device = dksc->sc_dev; 678 booted_method = "raidframe/multi"; 679 booted_partition = 0; /* XXX assume 'a' */ 680 } else { 681 /* we can't guess.. require the user to answer... */ 682 boothowto |= RB_ASKNAME; 683 } 684 } 685 } 686 687 static int 688 raidsize(dev_t dev) 689 { 690 struct raid_softc *rs; 691 struct dk_softc *dksc; 692 unsigned int unit; 693 694 unit = raidunit(dev); 695 if ((rs = raidget(unit, false)) == NULL) 696 return -1; 697 dksc = &rs->sc_dksc; 698 699 if ((rs->sc_flags & RAIDF_INITED) == 0) 700 return -1; 701 702 return dk_size(dksc, dev); 703 } 704 705 static int 706 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 707 { 708 unsigned int unit; 709 struct raid_softc *rs; 710 struct dk_softc *dksc; 711 712 unit = raidunit(dev); 713 if ((rs = raidget(unit, false)) == NULL) 714 return ENXIO; 715 dksc = &rs->sc_dksc; 716 717 if ((rs->sc_flags & RAIDF_INITED) == 0) 718 return ENODEV; 719 720 /* 721 Note that blkno is relative to this particular partition. 722 By adding adding RF_PROTECTED_SECTORS, we get a value that 723 is relative to the partition used for the underlying component. 724 */ 725 blkno += RF_PROTECTED_SECTORS; 726 727 return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE); 728 } 729 730 static int 731 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk) 732 { 733 struct raid_softc *rs = raidsoftc(dev); 734 const struct bdevsw *bdev; 735 RF_Raid_t *raidPtr; 736 int c, sparecol, j, scol, dumpto; 737 int error = 0; 738 739 raidPtr = &rs->sc_r; 740 741 /* we only support dumping to RAID 1 sets */ 742 if (raidPtr->Layout.numDataCol != 1 || 743 raidPtr->Layout.numParityCol != 1) 744 return EINVAL; 745 746 if ((error = raidlock(rs)) != 0) 747 return error; 748 749 /* figure out what device is alive.. */ 750 751 /* 752 Look for a component to dump to. The preference for the 753 component to dump to is as follows: 754 1) the first component 755 2) a used_spare of the first component 756 3) the second component 757 4) a used_spare of the second component 758 */ 759 760 dumpto = -1; 761 for (c = 0; c < raidPtr->numCol; c++) { 762 if (raidPtr->Disks[c].status == rf_ds_optimal) { 763 /* this might be the one */ 764 dumpto = c; 765 break; 766 } 767 } 768 769 /* 770 At this point we have possibly selected a live component. 771 If we didn't find a live ocmponent, we now check to see 772 if there is a relevant spared component. 773 */ 774 775 for (c = 0; c < raidPtr->numSpare; c++) { 776 sparecol = raidPtr->numCol + c; 777 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 778 /* How about this one? */ 779 scol = -1; 780 for(j=0;j<raidPtr->numCol;j++) { 781 if (raidPtr->Disks[j].spareCol == sparecol) { 782 scol = j; 783 break; 784 } 785 } 786 if (scol == 0) { 787 /* 788 We must have found a spared first 789 component! We'll take that over 790 anything else found so far. (We 791 couldn't have found a real first 792 component before, since this is a 793 used spare, and it's saying that 794 it's replacing the first 795 component.) On reboot (with 796 autoconfiguration turned on) 797 sparecol will become the first 798 component (component0) of this set. 799 */ 800 dumpto = sparecol; 801 break; 802 } else if (scol != -1) { 803 /* 804 Must be a spared second component. 805 We'll dump to that if we havn't found 806 anything else so far. 807 */ 808 if (dumpto == -1) 809 dumpto = sparecol; 810 } 811 } 812 } 813 814 if (dumpto == -1) { 815 /* we couldn't find any live components to dump to!?!? 816 */ 817 error = EINVAL; 818 goto out; 819 } 820 821 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 822 if (bdev == NULL) { 823 error = ENXIO; 824 goto out; 825 } 826 827 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 828 blkno, va, nblk * raidPtr->bytesPerSector); 829 830 out: 831 raidunlock(rs); 832 833 return error; 834 } 835 836 /* ARGSUSED */ 837 static int 838 raidopen(dev_t dev, int flags, int fmt, 839 struct lwp *l) 840 { 841 int unit = raidunit(dev); 842 struct raid_softc *rs; 843 struct dk_softc *dksc; 844 int error = 0; 845 int part, pmask; 846 847 if ((rs = raidget(unit, true)) == NULL) 848 return ENXIO; 849 if ((error = raidlock(rs)) != 0) 850 return error; 851 852 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) { 853 error = EBUSY; 854 goto bad; 855 } 856 857 dksc = &rs->sc_dksc; 858 859 part = DISKPART(dev); 860 pmask = (1 << part); 861 862 if (!DK_BUSY(dksc, pmask) && 863 ((rs->sc_flags & RAIDF_INITED) != 0)) { 864 /* First one... mark things as dirty... Note that we *MUST* 865 have done a configure before this. I DO NOT WANT TO BE 866 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 867 THAT THEY BELONG TOGETHER!!!!! */ 868 /* XXX should check to see if we're only open for reading 869 here... If so, we needn't do this, but then need some 870 other way of keeping track of what's happened.. */ 871 872 rf_markalldirty(&rs->sc_r); 873 } 874 875 if ((rs->sc_flags & RAIDF_INITED) != 0) 876 error = dk_open(dksc, dev, flags, fmt, l); 877 878 bad: 879 raidunlock(rs); 880 881 return error; 882 883 884 } 885 886 static int 887 raid_lastclose(device_t self) 888 { 889 struct raid_softc *rs = raidsoftc(self); 890 891 /* Last one... device is not unconfigured yet. 892 Device shutdown has taken care of setting the 893 clean bits if RAIDF_INITED is not set 894 mark things as clean... */ 895 896 rf_update_component_labels(&rs->sc_r, 897 RF_FINAL_COMPONENT_UPDATE); 898 899 /* pass to unlocked code */ 900 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) 901 rs->sc_flags |= RAIDF_DETACH; 902 903 return 0; 904 } 905 906 /* ARGSUSED */ 907 static int 908 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 909 { 910 int unit = raidunit(dev); 911 struct raid_softc *rs; 912 struct dk_softc *dksc; 913 cfdata_t cf; 914 int error = 0, do_detach = 0, do_put = 0; 915 916 if ((rs = raidget(unit, false)) == NULL) 917 return ENXIO; 918 dksc = &rs->sc_dksc; 919 920 if ((error = raidlock(rs)) != 0) 921 return error; 922 923 if ((rs->sc_flags & RAIDF_INITED) != 0) { 924 error = dk_close(dksc, dev, flags, fmt, l); 925 if ((rs->sc_flags & RAIDF_DETACH) != 0) 926 do_detach = 1; 927 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) 928 do_put = 1; 929 930 raidunlock(rs); 931 932 if (do_detach) { 933 /* free the pseudo device attach bits */ 934 cf = device_cfdata(dksc->sc_dev); 935 error = config_detach(dksc->sc_dev, 0); 936 if (error == 0) 937 free(cf, M_RAIDFRAME); 938 } else if (do_put) { 939 raidput(rs); 940 } 941 942 return error; 943 944 } 945 946 static void 947 raid_wakeup(RF_Raid_t *raidPtr) 948 { 949 rf_lock_mutex2(raidPtr->iodone_lock); 950 rf_signal_cond2(raidPtr->iodone_cv); 951 rf_unlock_mutex2(raidPtr->iodone_lock); 952 } 953 954 static void 955 raidstrategy(struct buf *bp) 956 { 957 unsigned int unit; 958 struct raid_softc *rs; 959 struct dk_softc *dksc; 960 RF_Raid_t *raidPtr; 961 962 unit = raidunit(bp->b_dev); 963 if ((rs = raidget(unit, false)) == NULL) { 964 bp->b_error = ENXIO; 965 goto fail; 966 } 967 if ((rs->sc_flags & RAIDF_INITED) == 0) { 968 bp->b_error = ENXIO; 969 goto fail; 970 } 971 dksc = &rs->sc_dksc; 972 raidPtr = &rs->sc_r; 973 974 /* Queue IO only */ 975 if (dk_strategy_defer(dksc, bp)) 976 goto done; 977 978 /* schedule the IO to happen at the next convenient time */ 979 raid_wakeup(raidPtr); 980 981 done: 982 return; 983 984 fail: 985 bp->b_resid = bp->b_bcount; 986 biodone(bp); 987 } 988 989 static int 990 raid_diskstart(device_t dev, struct buf *bp) 991 { 992 struct raid_softc *rs = raidsoftc(dev); 993 RF_Raid_t *raidPtr; 994 995 raidPtr = &rs->sc_r; 996 if (!raidPtr->valid) { 997 db1_printf(("raid is not valid..\n")); 998 return ENODEV; 999 } 1000 1001 /* XXX */ 1002 bp->b_resid = 0; 1003 1004 return raiddoaccess(raidPtr, bp); 1005 } 1006 1007 void 1008 raiddone(RF_Raid_t *raidPtr, struct buf *bp) 1009 { 1010 struct raid_softc *rs; 1011 struct dk_softc *dksc; 1012 1013 rs = raidPtr->softc; 1014 dksc = &rs->sc_dksc; 1015 1016 dk_done(dksc, bp); 1017 1018 rf_lock_mutex2(raidPtr->mutex); 1019 raidPtr->openings++; 1020 rf_unlock_mutex2(raidPtr->mutex); 1021 1022 /* schedule more IO */ 1023 raid_wakeup(raidPtr); 1024 } 1025 1026 /* ARGSUSED */ 1027 static int 1028 raidread(dev_t dev, struct uio *uio, int flags) 1029 { 1030 int unit = raidunit(dev); 1031 struct raid_softc *rs; 1032 1033 if ((rs = raidget(unit, false)) == NULL) 1034 return ENXIO; 1035 1036 if ((rs->sc_flags & RAIDF_INITED) == 0) 1037 return ENXIO; 1038 1039 return physio(raidstrategy, NULL, dev, B_READ, minphys, uio); 1040 1041 } 1042 1043 /* ARGSUSED */ 1044 static int 1045 raidwrite(dev_t dev, struct uio *uio, int flags) 1046 { 1047 int unit = raidunit(dev); 1048 struct raid_softc *rs; 1049 1050 if ((rs = raidget(unit, false)) == NULL) 1051 return ENXIO; 1052 1053 if ((rs->sc_flags & RAIDF_INITED) == 0) 1054 return ENXIO; 1055 1056 return physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio); 1057 1058 } 1059 1060 static int 1061 raid_detach_unlocked(struct raid_softc *rs) 1062 { 1063 struct dk_softc *dksc = &rs->sc_dksc; 1064 RF_Raid_t *raidPtr; 1065 int error; 1066 1067 raidPtr = &rs->sc_r; 1068 1069 if (DK_BUSY(dksc, 0) || 1070 raidPtr->recon_in_progress != 0 || 1071 raidPtr->parity_rewrite_in_progress != 0 || 1072 raidPtr->copyback_in_progress != 0) 1073 return EBUSY; 1074 1075 if ((rs->sc_flags & RAIDF_INITED) == 0) 1076 return 0; 1077 1078 rs->sc_flags &= ~RAIDF_SHUTDOWN; 1079 1080 if ((error = rf_Shutdown(raidPtr)) != 0) 1081 return error; 1082 1083 rs->sc_flags &= ~RAIDF_INITED; 1084 1085 /* Kill off any queued buffers */ 1086 dk_drain(dksc); 1087 bufq_free(dksc->sc_bufq); 1088 1089 /* Detach the disk. */ 1090 dkwedge_delall(&dksc->sc_dkdev); 1091 disk_detach(&dksc->sc_dkdev); 1092 disk_destroy(&dksc->sc_dkdev); 1093 dk_detach(dksc); 1094 1095 return 0; 1096 } 1097 1098 int 1099 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr) 1100 { 1101 struct rf_recon_req_internal *rrint; 1102 1103 if (raidPtr->Layout.map->faultsTolerated == 0) { 1104 /* Can't do this on a RAID 0!! */ 1105 return EINVAL; 1106 } 1107 1108 if (rr->col < 0 || rr->col >= raidPtr->numCol) { 1109 /* bad column */ 1110 return EINVAL; 1111 } 1112 1113 rf_lock_mutex2(raidPtr->mutex); 1114 if (raidPtr->status == rf_rs_reconstructing) { 1115 /* you can't fail a disk while we're reconstructing! */ 1116 /* XXX wrong for RAID6 */ 1117 goto out; 1118 } 1119 if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) && 1120 (raidPtr->numFailures > 0)) { 1121 /* some other component has failed. Let's not make 1122 things worse. XXX wrong for RAID6 */ 1123 goto out; 1124 } 1125 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1126 /* Can't fail a spared disk! */ 1127 goto out; 1128 } 1129 rf_unlock_mutex2(raidPtr->mutex); 1130 1131 /* make a copy of the recon request so that we don't rely on 1132 * the user's buffer */ 1133 rrint = RF_Malloc(sizeof(*rrint)); 1134 if (rrint == NULL) 1135 return(ENOMEM); 1136 rrint->col = rr->col; 1137 rrint->flags = rr->flags; 1138 rrint->raidPtr = raidPtr; 1139 1140 return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread, 1141 rrint, "raid_recon"); 1142 out: 1143 rf_unlock_mutex2(raidPtr->mutex); 1144 return EINVAL; 1145 } 1146 1147 static int 1148 rf_copyinspecificbuf(RF_Config_t *k_cfg) 1149 { 1150 /* allocate a buffer for the layout-specific data, and copy it in */ 1151 if (k_cfg->layoutSpecificSize == 0) 1152 return 0; 1153 1154 if (k_cfg->layoutSpecificSize > 10000) { 1155 /* sanity check */ 1156 return EINVAL; 1157 } 1158 1159 u_char *specific_buf; 1160 specific_buf = RF_Malloc(k_cfg->layoutSpecificSize); 1161 if (specific_buf == NULL) 1162 return ENOMEM; 1163 1164 int retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1165 k_cfg->layoutSpecificSize); 1166 if (retcode) { 1167 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1168 db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode)); 1169 return retcode; 1170 } 1171 1172 k_cfg->layoutSpecific = specific_buf; 1173 return 0; 1174 } 1175 1176 static int 1177 rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg) 1178 { 1179 RF_Config_t *u_cfg = *((RF_Config_t **) data); 1180 1181 if (rs->sc_r.valid) { 1182 /* There is a valid RAID set running on this unit! */ 1183 printf("raid%d: Device already configured!\n", rs->sc_unit); 1184 return EINVAL; 1185 } 1186 1187 /* copy-in the configuration information */ 1188 /* data points to a pointer to the configuration structure */ 1189 *k_cfg = RF_Malloc(sizeof(**k_cfg)); 1190 if (*k_cfg == NULL) { 1191 return ENOMEM; 1192 } 1193 int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t)); 1194 if (retcode == 0) 1195 return 0; 1196 RF_Free(*k_cfg, sizeof(RF_Config_t)); 1197 db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode)); 1198 rs->sc_flags |= RAIDF_SHUTDOWN; 1199 return retcode; 1200 } 1201 1202 int 1203 rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg) 1204 { 1205 int retcode, i; 1206 RF_Raid_t *raidPtr = &rs->sc_r; 1207 1208 rs->sc_flags &= ~RAIDF_SHUTDOWN; 1209 1210 if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0) 1211 goto out; 1212 1213 /* should do some kind of sanity check on the configuration. 1214 * Store the sum of all the bytes in the last byte? */ 1215 1216 /* Force nul-termination on all strings. */ 1217 #define ZERO_FINAL(s) do { s[sizeof(s) - 1] = '\0'; } while (0) 1218 for (i = 0; i < RF_MAXCOL; i++) { 1219 ZERO_FINAL(k_cfg->devnames[0][i]); 1220 } 1221 for (i = 0; i < RF_MAXSPARE; i++) { 1222 ZERO_FINAL(k_cfg->spare_names[i]); 1223 } 1224 for (i = 0; i < RF_MAXDBGV; i++) { 1225 ZERO_FINAL(k_cfg->debugVars[i]); 1226 } 1227 #undef ZERO_FINAL 1228 1229 /* Check some basic limits. */ 1230 if (k_cfg->numCol >= RF_MAXCOL || k_cfg->numCol < 0) { 1231 retcode = EINVAL; 1232 goto out; 1233 } 1234 if (k_cfg->numSpare >= RF_MAXSPARE || k_cfg->numSpare < 0) { 1235 retcode = EINVAL; 1236 goto out; 1237 } 1238 1239 /* configure the system */ 1240 1241 /* 1242 * Clear the entire RAID descriptor, just to make sure 1243 * there is no stale data left in the case of a 1244 * reconfiguration 1245 */ 1246 memset(raidPtr, 0, sizeof(*raidPtr)); 1247 raidPtr->softc = rs; 1248 raidPtr->raidid = rs->sc_unit; 1249 1250 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1251 1252 if (retcode == 0) { 1253 /* allow this many simultaneous IO's to 1254 this RAID device */ 1255 raidPtr->openings = RAIDOUTSTANDING; 1256 1257 raidinit(rs); 1258 raid_wakeup(raidPtr); 1259 rf_markalldirty(raidPtr); 1260 } 1261 1262 /* free the buffers. No return code here. */ 1263 if (k_cfg->layoutSpecificSize) { 1264 RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize); 1265 } 1266 out: 1267 RF_Free(k_cfg, sizeof(RF_Config_t)); 1268 if (retcode) { 1269 /* 1270 * If configuration failed, set sc_flags so that we 1271 * will detach the device when we close it. 1272 */ 1273 rs->sc_flags |= RAIDF_SHUTDOWN; 1274 } 1275 return retcode; 1276 } 1277 1278 #if RF_DISABLED 1279 static int 1280 rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 1281 { 1282 1283 /* XXX check the label for valid stuff... */ 1284 /* Note that some things *should not* get modified -- 1285 the user should be re-initing the labels instead of 1286 trying to patch things. 1287 */ 1288 #ifdef DEBUG 1289 int raidid = raidPtr->raidid; 1290 printf("raid%d: Got component label:\n", raidid); 1291 printf("raid%d: Version: %d\n", raidid, clabel->version); 1292 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1293 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1294 printf("raid%d: Column: %d\n", raidid, clabel->column); 1295 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1296 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1297 printf("raid%d: Status: %d\n", raidid, clabel->status); 1298 #endif /* DEBUG */ 1299 clabel->row = 0; 1300 int column = clabel->column; 1301 1302 if ((column < 0) || (column >= raidPtr->numCol)) { 1303 return(EINVAL); 1304 } 1305 1306 /* XXX this isn't allowed to do anything for now :-) */ 1307 1308 /* XXX and before it is, we need to fill in the rest 1309 of the fields!?!?!?! */ 1310 memcpy(raidget_component_label(raidPtr, column), 1311 clabel, sizeof(*clabel)); 1312 raidflush_component_label(raidPtr, column); 1313 return 0; 1314 } 1315 #endif 1316 1317 static int 1318 rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 1319 { 1320 /* 1321 we only want the serial number from 1322 the above. We get all the rest of the information 1323 from the config that was used to create this RAID 1324 set. 1325 */ 1326 1327 raidPtr->serial_number = clabel->serial_number; 1328 1329 for (int column = 0; column < raidPtr->numCol; column++) { 1330 RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column]; 1331 if (RF_DEAD_DISK(diskPtr->status)) 1332 continue; 1333 RF_ComponentLabel_t *ci_label = raidget_component_label( 1334 raidPtr, column); 1335 /* Zeroing this is important. */ 1336 memset(ci_label, 0, sizeof(*ci_label)); 1337 raid_init_component_label(raidPtr, ci_label); 1338 ci_label->serial_number = raidPtr->serial_number; 1339 ci_label->row = 0; /* we dont' pretend to support more */ 1340 rf_component_label_set_partitionsize(ci_label, 1341 diskPtr->partitionSize); 1342 ci_label->column = column; 1343 raidflush_component_label(raidPtr, column); 1344 /* XXXjld what about the spares? */ 1345 } 1346 1347 return 0; 1348 } 1349 1350 static int 1351 rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr) 1352 { 1353 1354 if (raidPtr->Layout.map->faultsTolerated == 0) { 1355 /* Can't do this on a RAID 0!! */ 1356 return EINVAL; 1357 } 1358 1359 if (raidPtr->recon_in_progress == 1) { 1360 /* a reconstruct is already in progress! */ 1361 return EINVAL; 1362 } 1363 1364 RF_SingleComponent_t component; 1365 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t)); 1366 component.row = 0; /* we don't support any more */ 1367 int column = component.column; 1368 1369 if ((column < 0) || (column >= raidPtr->numCol)) { 1370 return EINVAL; 1371 } 1372 1373 rf_lock_mutex2(raidPtr->mutex); 1374 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1375 (raidPtr->numFailures > 0)) { 1376 /* XXX 0 above shouldn't be constant!!! */ 1377 /* some component other than this has failed. 1378 Let's not make things worse than they already 1379 are... */ 1380 printf("raid%d: Unable to reconstruct to disk at:\n", 1381 raidPtr->raidid); 1382 printf("raid%d: Col: %d Too many failures.\n", 1383 raidPtr->raidid, column); 1384 rf_unlock_mutex2(raidPtr->mutex); 1385 return EINVAL; 1386 } 1387 1388 if (raidPtr->Disks[column].status == rf_ds_reconstructing) { 1389 printf("raid%d: Unable to reconstruct to disk at:\n", 1390 raidPtr->raidid); 1391 printf("raid%d: Col: %d " 1392 "Reconstruction already occurring!\n", 1393 raidPtr->raidid, column); 1394 1395 rf_unlock_mutex2(raidPtr->mutex); 1396 return EINVAL; 1397 } 1398 1399 if (raidPtr->Disks[column].status == rf_ds_spared) { 1400 rf_unlock_mutex2(raidPtr->mutex); 1401 return EINVAL; 1402 } 1403 1404 rf_unlock_mutex2(raidPtr->mutex); 1405 1406 struct rf_recon_req_internal *rrint; 1407 rrint = RF_Malloc(sizeof(*rrint)); 1408 if (rrint == NULL) 1409 return ENOMEM; 1410 1411 rrint->col = column; 1412 rrint->raidPtr = raidPtr; 1413 1414 return RF_CREATE_THREAD(raidPtr->recon_thread, 1415 rf_ReconstructInPlaceThread, rrint, "raid_reconip"); 1416 } 1417 1418 static int 1419 rf_check_recon_status(RF_Raid_t *raidPtr, int *data) 1420 { 1421 /* 1422 * This makes no sense on a RAID 0, or if we are not reconstructing 1423 * so tell the user it's done. 1424 */ 1425 if (raidPtr->Layout.map->faultsTolerated == 0 || 1426 raidPtr->status != rf_rs_reconstructing) { 1427 *data = 100; 1428 return 0; 1429 } 1430 if (raidPtr->reconControl->numRUsTotal == 0) { 1431 *data = 0; 1432 return 0; 1433 } 1434 *data = (raidPtr->reconControl->numRUsComplete * 100 1435 / raidPtr->reconControl->numRUsTotal); 1436 return 0; 1437 } 1438 1439 /* 1440 * Copy a RF_SingleComponent_t from 'data', ensuring nul-termination 1441 * on the component_name[] array. 1442 */ 1443 static void 1444 rf_copy_single_component(RF_SingleComponent_t *component, void *data) 1445 { 1446 1447 memcpy(component, data, sizeof *component); 1448 component->component_name[sizeof(component->component_name) - 1] = '\0'; 1449 } 1450 1451 static int 1452 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1453 { 1454 int unit = raidunit(dev); 1455 int part, pmask; 1456 struct raid_softc *rs; 1457 struct dk_softc *dksc; 1458 RF_Config_t *k_cfg; 1459 RF_Raid_t *raidPtr; 1460 RF_AccTotals_t *totals; 1461 RF_SingleComponent_t component; 1462 RF_DeviceConfig_t *d_cfg, *ucfgp; 1463 int retcode = 0; 1464 int column; 1465 RF_ComponentLabel_t *clabel; 1466 int d; 1467 1468 if ((rs = raidget(unit, false)) == NULL) 1469 return ENXIO; 1470 1471 dksc = &rs->sc_dksc; 1472 raidPtr = &rs->sc_r; 1473 1474 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev, 1475 (int) DISKPART(dev), (int) unit, cmd)); 1476 1477 /* Only CONFIGURE and RESCAN can be done without the RAID being initialized. */ 1478 switch (cmd) { 1479 case RAIDFRAME_CONFIGURE: 1480 case RAIDFRAME_RESCAN: 1481 break; 1482 default: 1483 if (!rf_inited(rs)) 1484 return ENXIO; 1485 } 1486 1487 switch (cmd) { 1488 /* configure the system */ 1489 case RAIDFRAME_CONFIGURE: 1490 if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0) 1491 return retcode; 1492 return rf_construct(rs, k_cfg); 1493 1494 /* shutdown the system */ 1495 case RAIDFRAME_SHUTDOWN: 1496 1497 part = DISKPART(dev); 1498 pmask = (1 << part); 1499 1500 if ((retcode = raidlock(rs)) != 0) 1501 return retcode; 1502 1503 if (DK_BUSY(dksc, pmask) || 1504 raidPtr->recon_in_progress != 0 || 1505 raidPtr->parity_rewrite_in_progress != 0 || 1506 raidPtr->copyback_in_progress != 0) 1507 retcode = EBUSY; 1508 else { 1509 /* detach and free on close */ 1510 rs->sc_flags |= RAIDF_SHUTDOWN; 1511 retcode = 0; 1512 } 1513 1514 raidunlock(rs); 1515 1516 return retcode; 1517 case RAIDFRAME_GET_COMPONENT_LABEL: 1518 return rf_get_component_label(raidPtr, data); 1519 1520 #if RF_DISABLED 1521 case RAIDFRAME_SET_COMPONENT_LABEL: 1522 return rf_set_component_label(raidPtr, data); 1523 #endif 1524 1525 case RAIDFRAME_INIT_LABELS: 1526 return rf_init_component_label(raidPtr, data); 1527 1528 case RAIDFRAME_SET_AUTOCONFIG: 1529 d = rf_set_autoconfig(raidPtr, *(int *) data); 1530 printf("raid%d: New autoconfig value is: %d\n", 1531 raidPtr->raidid, d); 1532 *(int *) data = d; 1533 return retcode; 1534 1535 case RAIDFRAME_SET_ROOT: 1536 d = rf_set_rootpartition(raidPtr, *(int *) data); 1537 printf("raid%d: New rootpartition value is: %d\n", 1538 raidPtr->raidid, d); 1539 *(int *) data = d; 1540 return retcode; 1541 1542 /* initialize all parity */ 1543 case RAIDFRAME_REWRITEPARITY: 1544 1545 if (raidPtr->Layout.map->faultsTolerated == 0) { 1546 /* Parity for RAID 0 is trivially correct */ 1547 raidPtr->parity_good = RF_RAID_CLEAN; 1548 return 0; 1549 } 1550 1551 if (raidPtr->parity_rewrite_in_progress == 1) { 1552 /* Re-write is already in progress! */ 1553 return EINVAL; 1554 } 1555 1556 return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1557 rf_RewriteParityThread, raidPtr,"raid_parity"); 1558 1559 case RAIDFRAME_ADD_HOT_SPARE: 1560 rf_copy_single_component(&component, data); 1561 return rf_add_hot_spare(raidPtr, &component); 1562 1563 case RAIDFRAME_REMOVE_HOT_SPARE: 1564 return retcode; 1565 1566 case RAIDFRAME_DELETE_COMPONENT: 1567 rf_copy_single_component(&component, data); 1568 return rf_delete_component(raidPtr, &component); 1569 1570 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1571 rf_copy_single_component(&component, data); 1572 return rf_incorporate_hot_spare(raidPtr, &component); 1573 1574 case RAIDFRAME_REBUILD_IN_PLACE: 1575 return rf_rebuild_in_place(raidPtr, data); 1576 1577 case RAIDFRAME_GET_INFO: 1578 ucfgp = *(RF_DeviceConfig_t **)data; 1579 d_cfg = RF_Malloc(sizeof(*d_cfg)); 1580 if (d_cfg == NULL) 1581 return ENOMEM; 1582 retcode = rf_get_info(raidPtr, d_cfg); 1583 if (retcode == 0) { 1584 retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg)); 1585 } 1586 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1587 return retcode; 1588 1589 case RAIDFRAME_CHECK_PARITY: 1590 *(int *) data = raidPtr->parity_good; 1591 return 0; 1592 1593 case RAIDFRAME_PARITYMAP_STATUS: 1594 if (rf_paritymap_ineligible(raidPtr)) 1595 return EINVAL; 1596 rf_paritymap_status(raidPtr->parity_map, data); 1597 return 0; 1598 1599 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1600 if (rf_paritymap_ineligible(raidPtr)) 1601 return EINVAL; 1602 if (raidPtr->parity_map == NULL) 1603 return ENOENT; /* ??? */ 1604 if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0) 1605 return EINVAL; 1606 return 0; 1607 1608 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1609 if (rf_paritymap_ineligible(raidPtr)) 1610 return EINVAL; 1611 *(int *) data = rf_paritymap_get_disable(raidPtr); 1612 return 0; 1613 1614 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1615 if (rf_paritymap_ineligible(raidPtr)) 1616 return EINVAL; 1617 rf_paritymap_set_disable(raidPtr, *(int *)data); 1618 /* XXX should errors be passed up? */ 1619 return 0; 1620 1621 case RAIDFRAME_RESCAN: 1622 return rf_rescan(); 1623 1624 case RAIDFRAME_RESET_ACCTOTALS: 1625 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1626 return 0; 1627 1628 case RAIDFRAME_GET_ACCTOTALS: 1629 totals = (RF_AccTotals_t *) data; 1630 *totals = raidPtr->acc_totals; 1631 return 0; 1632 1633 case RAIDFRAME_KEEP_ACCTOTALS: 1634 raidPtr->keep_acc_totals = *(int *)data; 1635 return 0; 1636 1637 case RAIDFRAME_GET_SIZE: 1638 *(int *) data = raidPtr->totalSectors; 1639 return 0; 1640 1641 case RAIDFRAME_FAIL_DISK: 1642 return rf_fail_disk(raidPtr, data); 1643 1644 /* invoke a copyback operation after recon on whatever disk 1645 * needs it, if any */ 1646 case RAIDFRAME_COPYBACK: 1647 1648 if (raidPtr->Layout.map->faultsTolerated == 0) { 1649 /* This makes no sense on a RAID 0!! */ 1650 return EINVAL; 1651 } 1652 1653 if (raidPtr->copyback_in_progress == 1) { 1654 /* Copyback is already in progress! */ 1655 return EINVAL; 1656 } 1657 1658 return RF_CREATE_THREAD(raidPtr->copyback_thread, 1659 rf_CopybackThread, raidPtr, "raid_copyback"); 1660 1661 /* return the percentage completion of reconstruction */ 1662 case RAIDFRAME_CHECK_RECON_STATUS: 1663 return rf_check_recon_status(raidPtr, data); 1664 1665 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1666 rf_check_recon_status_ext(raidPtr, data); 1667 return 0; 1668 1669 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1670 if (raidPtr->Layout.map->faultsTolerated == 0) { 1671 /* This makes no sense on a RAID 0, so tell the 1672 user it's done. */ 1673 *(int *) data = 100; 1674 return 0; 1675 } 1676 if (raidPtr->parity_rewrite_in_progress == 1) { 1677 *(int *) data = 100 * 1678 raidPtr->parity_rewrite_stripes_done / 1679 raidPtr->Layout.numStripe; 1680 } else { 1681 *(int *) data = 100; 1682 } 1683 return 0; 1684 1685 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1686 rf_check_parityrewrite_status_ext(raidPtr, data); 1687 return 0; 1688 1689 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1690 if (raidPtr->Layout.map->faultsTolerated == 0) { 1691 /* This makes no sense on a RAID 0 */ 1692 *(int *) data = 100; 1693 return 0; 1694 } 1695 if (raidPtr->copyback_in_progress == 1) { 1696 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1697 raidPtr->Layout.numStripe; 1698 } else { 1699 *(int *) data = 100; 1700 } 1701 return 0; 1702 1703 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1704 rf_check_copyback_status_ext(raidPtr, data); 1705 return 0; 1706 1707 case RAIDFRAME_SET_LAST_UNIT: 1708 for (column = 0; column < raidPtr->numCol; column++) 1709 if (raidPtr->Disks[column].status != rf_ds_optimal) 1710 return EBUSY; 1711 1712 for (column = 0; column < raidPtr->numCol; column++) { 1713 clabel = raidget_component_label(raidPtr, column); 1714 clabel->last_unit = *(int *)data; 1715 raidflush_component_label(raidPtr, column); 1716 } 1717 rs->sc_cflags |= RAIDF_UNIT_CHANGED; 1718 return 0; 1719 1720 /* the sparetable daemon calls this to wait for the kernel to 1721 * need a spare table. this ioctl does not return until a 1722 * spare table is needed. XXX -- calling mpsleep here in the 1723 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1724 * -- I should either compute the spare table in the kernel, 1725 * or have a different -- XXX XXX -- interface (a different 1726 * character device) for delivering the table -- XXX */ 1727 #if RF_DISABLED 1728 case RAIDFRAME_SPARET_WAIT: 1729 rf_lock_mutex2(rf_sparet_wait_mutex); 1730 while (!rf_sparet_wait_queue) 1731 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex); 1732 RF_SparetWait_t *waitreq = rf_sparet_wait_queue; 1733 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1734 rf_unlock_mutex2(rf_sparet_wait_mutex); 1735 1736 /* structure assignment */ 1737 *((RF_SparetWait_t *) data) = *waitreq; 1738 1739 RF_Free(waitreq, sizeof(*waitreq)); 1740 return 0; 1741 1742 /* wakes up a process waiting on SPARET_WAIT and puts an error 1743 * code in it that will cause the dameon to exit */ 1744 case RAIDFRAME_ABORT_SPARET_WAIT: 1745 waitreq = RF_Malloc(sizeof(*waitreq)); 1746 waitreq->fcol = -1; 1747 rf_lock_mutex2(rf_sparet_wait_mutex); 1748 waitreq->next = rf_sparet_wait_queue; 1749 rf_sparet_wait_queue = waitreq; 1750 rf_broadcast_cond2(rf_sparet_wait_cv); 1751 rf_unlock_mutex2(rf_sparet_wait_mutex); 1752 return 0; 1753 1754 /* used by the spare table daemon to deliver a spare table 1755 * into the kernel */ 1756 case RAIDFRAME_SEND_SPARET: 1757 1758 /* install the spare table */ 1759 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1760 1761 /* respond to the requestor. the return status of the spare 1762 * table installation is passed in the "fcol" field */ 1763 waitred = RF_Malloc(sizeof(*waitreq)); 1764 waitreq->fcol = retcode; 1765 rf_lock_mutex2(rf_sparet_wait_mutex); 1766 waitreq->next = rf_sparet_resp_queue; 1767 rf_sparet_resp_queue = waitreq; 1768 rf_broadcast_cond2(rf_sparet_resp_cv); 1769 rf_unlock_mutex2(rf_sparet_wait_mutex); 1770 1771 return retcode; 1772 #endif 1773 default: 1774 /* 1775 * Don't bother trying to load compat modules 1776 * if it is not our ioctl. This is more efficient 1777 * and makes rump tests not depend on compat code 1778 */ 1779 if (IOCGROUP(cmd) != 'r') 1780 break; 1781 #ifdef _LP64 1782 if ((l->l_proc->p_flag & PK_32) != 0) { 1783 module_autoload("compat_netbsd32_raid", 1784 MODULE_CLASS_EXEC); 1785 MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook, 1786 (rs, cmd, data), enosys(), retcode); 1787 if (retcode != EPASSTHROUGH) 1788 return retcode; 1789 } 1790 #endif 1791 module_autoload("compat_raid_80", MODULE_CLASS_EXEC); 1792 MODULE_HOOK_CALL(raidframe_ioctl_80_hook, 1793 (rs, cmd, data), enosys(), retcode); 1794 if (retcode != EPASSTHROUGH) 1795 return retcode; 1796 1797 module_autoload("compat_raid_50", MODULE_CLASS_EXEC); 1798 MODULE_HOOK_CALL(raidframe_ioctl_50_hook, 1799 (rs, cmd, data), enosys(), retcode); 1800 if (retcode != EPASSTHROUGH) 1801 return retcode; 1802 break; /* fall through to the os-specific code below */ 1803 1804 } 1805 1806 if (!raidPtr->valid) 1807 return EINVAL; 1808 1809 /* 1810 * Add support for "regular" device ioctls here. 1811 */ 1812 1813 switch (cmd) { 1814 case DIOCGCACHE: 1815 retcode = rf_get_component_caches(raidPtr, (int *)data); 1816 break; 1817 1818 case DIOCCACHESYNC: 1819 retcode = rf_sync_component_caches(raidPtr, *(int *)data); 1820 break; 1821 1822 default: 1823 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l); 1824 break; 1825 } 1826 1827 return retcode; 1828 1829 } 1830 1831 1832 /* raidinit -- complete the rest of the initialization for the 1833 RAIDframe device. */ 1834 1835 1836 static void 1837 raidinit(struct raid_softc *rs) 1838 { 1839 cfdata_t cf; 1840 unsigned int unit; 1841 struct dk_softc *dksc = &rs->sc_dksc; 1842 RF_Raid_t *raidPtr = &rs->sc_r; 1843 device_t dev; 1844 1845 unit = raidPtr->raidid; 1846 1847 /* XXX doesn't check bounds. */ 1848 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit); 1849 1850 /* attach the pseudo device */ 1851 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1852 cf->cf_name = raid_cd.cd_name; 1853 cf->cf_atname = raid_cd.cd_name; 1854 cf->cf_unit = unit; 1855 cf->cf_fstate = FSTATE_STAR; 1856 1857 dev = config_attach_pseudo(cf); 1858 if (dev == NULL) { 1859 printf("raid%d: config_attach_pseudo failed\n", 1860 raidPtr->raidid); 1861 free(cf, M_RAIDFRAME); 1862 return; 1863 } 1864 1865 /* provide a backpointer to the real softc */ 1866 raidsoftc(dev) = rs; 1867 1868 /* disk_attach actually creates space for the CPU disklabel, among 1869 * other things, so it's critical to call this *BEFORE* we try putzing 1870 * with disklabels. */ 1871 dk_init(dksc, dev, DKTYPE_RAID); 1872 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver); 1873 1874 /* XXX There may be a weird interaction here between this, and 1875 * protectedSectors, as used in RAIDframe. */ 1876 1877 rs->sc_size = raidPtr->totalSectors; 1878 1879 /* Attach dk and disk subsystems */ 1880 dk_attach(dksc); 1881 disk_attach(&dksc->sc_dkdev); 1882 rf_set_geometry(rs, raidPtr); 1883 1884 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK); 1885 1886 /* mark unit as usuable */ 1887 rs->sc_flags |= RAIDF_INITED; 1888 1889 dkwedge_discover(&dksc->sc_dkdev); 1890 } 1891 1892 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1893 /* wake up the daemon & tell it to get us a spare table 1894 * XXX 1895 * the entries in the queues should be tagged with the raidPtr 1896 * so that in the extremely rare case that two recons happen at once, 1897 * we know for which device were requesting a spare table 1898 * XXX 1899 * 1900 * XXX This code is not currently used. GO 1901 */ 1902 int 1903 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1904 { 1905 int retcode; 1906 1907 rf_lock_mutex2(rf_sparet_wait_mutex); 1908 req->next = rf_sparet_wait_queue; 1909 rf_sparet_wait_queue = req; 1910 rf_broadcast_cond2(rf_sparet_wait_cv); 1911 1912 /* mpsleep unlocks the mutex */ 1913 while (!rf_sparet_resp_queue) { 1914 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex); 1915 } 1916 req = rf_sparet_resp_queue; 1917 rf_sparet_resp_queue = req->next; 1918 rf_unlock_mutex2(rf_sparet_wait_mutex); 1919 1920 retcode = req->fcol; 1921 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1922 * alloc'd */ 1923 return retcode; 1924 } 1925 #endif 1926 1927 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1928 * bp & passes it down. 1929 * any calls originating in the kernel must use non-blocking I/O 1930 * do some extra sanity checking to return "appropriate" error values for 1931 * certain conditions (to make some standard utilities work) 1932 * 1933 * Formerly known as: rf_DoAccessKernel 1934 */ 1935 void 1936 raidstart(RF_Raid_t *raidPtr) 1937 { 1938 struct raid_softc *rs; 1939 struct dk_softc *dksc; 1940 1941 rs = raidPtr->softc; 1942 dksc = &rs->sc_dksc; 1943 /* quick check to see if anything has died recently */ 1944 rf_lock_mutex2(raidPtr->mutex); 1945 if (raidPtr->numNewFailures > 0) { 1946 rf_unlock_mutex2(raidPtr->mutex); 1947 rf_update_component_labels(raidPtr, 1948 RF_NORMAL_COMPONENT_UPDATE); 1949 rf_lock_mutex2(raidPtr->mutex); 1950 raidPtr->numNewFailures--; 1951 } 1952 rf_unlock_mutex2(raidPtr->mutex); 1953 1954 if ((rs->sc_flags & RAIDF_INITED) == 0) { 1955 printf("raid%d: raidstart not ready\n", raidPtr->raidid); 1956 return; 1957 } 1958 1959 dk_start(dksc, NULL); 1960 } 1961 1962 static int 1963 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp) 1964 { 1965 RF_SectorCount_t num_blocks, pb, sum; 1966 RF_RaidAddr_t raid_addr; 1967 daddr_t blocknum; 1968 int rc; 1969 1970 rf_lock_mutex2(raidPtr->mutex); 1971 if (raidPtr->openings == 0) { 1972 rf_unlock_mutex2(raidPtr->mutex); 1973 return EAGAIN; 1974 } 1975 rf_unlock_mutex2(raidPtr->mutex); 1976 1977 blocknum = bp->b_rawblkno; 1978 1979 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 1980 (int) blocknum)); 1981 1982 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 1983 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 1984 1985 /* *THIS* is where we adjust what block we're going to... 1986 * but DO NOT TOUCH bp->b_blkno!!! */ 1987 raid_addr = blocknum; 1988 1989 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 1990 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 1991 sum = raid_addr + num_blocks + pb; 1992 if (1 || rf_debugKernelAccess) { 1993 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 1994 (int) raid_addr, (int) sum, (int) num_blocks, 1995 (int) pb, (int) bp->b_resid)); 1996 } 1997 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 1998 || (sum < num_blocks) || (sum < pb)) { 1999 rc = ENOSPC; 2000 goto done; 2001 } 2002 /* 2003 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 2004 */ 2005 2006 if (bp->b_bcount & raidPtr->sectorMask) { 2007 rc = ENOSPC; 2008 goto done; 2009 } 2010 db1_printf(("Calling DoAccess..\n")); 2011 2012 2013 rf_lock_mutex2(raidPtr->mutex); 2014 raidPtr->openings--; 2015 rf_unlock_mutex2(raidPtr->mutex); 2016 2017 /* don't ever condition on bp->b_flags & B_WRITE. 2018 * always condition on B_READ instead */ 2019 2020 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 2021 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 2022 raid_addr, num_blocks, 2023 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 2024 2025 done: 2026 return rc; 2027 } 2028 2029 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 2030 2031 int 2032 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 2033 { 2034 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2035 struct buf *bp; 2036 2037 req->queue = queue; 2038 bp = req->bp; 2039 2040 switch (req->type) { 2041 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2042 /* XXX need to do something extra here.. */ 2043 /* I'm leaving this in, as I've never actually seen it used, 2044 * and I'd like folks to report it... GO */ 2045 printf("%s: WAKEUP CALLED\n", __func__); 2046 queue->numOutstanding++; 2047 2048 bp->b_flags = 0; 2049 bp->b_private = req; 2050 2051 KernelWakeupFunc(bp); 2052 break; 2053 2054 case RF_IO_TYPE_READ: 2055 case RF_IO_TYPE_WRITE: 2056 #if RF_ACC_TRACE > 0 2057 if (req->tracerec) { 2058 RF_ETIMER_START(req->tracerec->timer); 2059 } 2060 #endif 2061 InitBP(bp, queue->rf_cinfo->ci_vp, 2062 op, queue->rf_cinfo->ci_dev, 2063 req->sectorOffset, req->numSector, 2064 req->buf, KernelWakeupFunc, (void *) req, 2065 queue->raidPtr->logBytesPerSector); 2066 2067 if (rf_debugKernelAccess) { 2068 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2069 (long) bp->b_blkno)); 2070 } 2071 queue->numOutstanding++; 2072 queue->last_deq_sector = req->sectorOffset; 2073 /* acc wouldn't have been let in if there were any pending 2074 * reqs at any other priority */ 2075 queue->curPriority = req->priority; 2076 2077 db1_printf(("Going for %c to unit %d col %d\n", 2078 req->type, queue->raidPtr->raidid, 2079 queue->col)); 2080 db1_printf(("sector %d count %d (%d bytes) %d\n", 2081 (int) req->sectorOffset, (int) req->numSector, 2082 (int) (req->numSector << 2083 queue->raidPtr->logBytesPerSector), 2084 (int) queue->raidPtr->logBytesPerSector)); 2085 2086 /* 2087 * XXX: drop lock here since this can block at 2088 * least with backing SCSI devices. Retake it 2089 * to minimize fuss with calling interfaces. 2090 */ 2091 2092 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam"); 2093 bdev_strategy(bp); 2094 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam"); 2095 break; 2096 2097 default: 2098 panic("bad req->type in rf_DispatchKernelIO"); 2099 } 2100 db1_printf(("Exiting from DispatchKernelIO\n")); 2101 2102 return 0; 2103 } 2104 /* this is the callback function associated with a I/O invoked from 2105 kernel code. 2106 */ 2107 static void 2108 KernelWakeupFunc(struct buf *bp) 2109 { 2110 RF_DiskQueueData_t *req = NULL; 2111 RF_DiskQueue_t *queue; 2112 2113 db1_printf(("recovering the request queue:\n")); 2114 2115 req = bp->b_private; 2116 2117 queue = (RF_DiskQueue_t *) req->queue; 2118 2119 rf_lock_mutex2(queue->raidPtr->iodone_lock); 2120 2121 #if RF_ACC_TRACE > 0 2122 if (req->tracerec) { 2123 RF_ETIMER_STOP(req->tracerec->timer); 2124 RF_ETIMER_EVAL(req->tracerec->timer); 2125 rf_lock_mutex2(rf_tracing_mutex); 2126 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2127 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2128 req->tracerec->num_phys_ios++; 2129 rf_unlock_mutex2(rf_tracing_mutex); 2130 } 2131 #endif 2132 2133 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2134 * ballistic, and mark the component as hosed... */ 2135 2136 if (bp->b_error != 0) { 2137 /* Mark the disk as dead */ 2138 /* but only mark it once... */ 2139 /* and only if it wouldn't leave this RAID set 2140 completely broken */ 2141 if (((queue->raidPtr->Disks[queue->col].status == 2142 rf_ds_optimal) || 2143 (queue->raidPtr->Disks[queue->col].status == 2144 rf_ds_used_spare)) && 2145 (queue->raidPtr->numFailures < 2146 queue->raidPtr->Layout.map->faultsTolerated)) { 2147 printf("raid%d: IO Error (%d). Marking %s as failed.\n", 2148 queue->raidPtr->raidid, 2149 bp->b_error, 2150 queue->raidPtr->Disks[queue->col].devname); 2151 queue->raidPtr->Disks[queue->col].status = 2152 rf_ds_failed; 2153 queue->raidPtr->status = rf_rs_degraded; 2154 queue->raidPtr->numFailures++; 2155 queue->raidPtr->numNewFailures++; 2156 } else { /* Disk is already dead... */ 2157 /* printf("Disk already marked as dead!\n"); */ 2158 } 2159 2160 } 2161 2162 /* Fill in the error value */ 2163 req->error = bp->b_error; 2164 2165 /* Drop this one on the "finished" queue... */ 2166 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2167 2168 /* Let the raidio thread know there is work to be done. */ 2169 rf_signal_cond2(queue->raidPtr->iodone_cv); 2170 2171 rf_unlock_mutex2(queue->raidPtr->iodone_lock); 2172 } 2173 2174 2175 /* 2176 * initialize a buf structure for doing an I/O in the kernel. 2177 */ 2178 static void 2179 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2180 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2181 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector) 2182 { 2183 bp->b_flags = rw_flag | (bp->b_flags & rf_b_pass); 2184 bp->b_oflags = 0; 2185 bp->b_cflags = 0; 2186 bp->b_bcount = numSect << logBytesPerSector; 2187 bp->b_bufsize = bp->b_bcount; 2188 bp->b_error = 0; 2189 bp->b_dev = dev; 2190 bp->b_data = bf; 2191 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT; 2192 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2193 if (bp->b_bcount == 0) { 2194 panic("bp->b_bcount is zero in InitBP!!"); 2195 } 2196 bp->b_iodone = cbFunc; 2197 bp->b_private = cbArg; 2198 } 2199 2200 /* 2201 * Wait interruptibly for an exclusive lock. 2202 * 2203 * XXX 2204 * Several drivers do this; it should be abstracted and made MP-safe. 2205 * (Hmm... where have we seen this warning before :-> GO ) 2206 */ 2207 static int 2208 raidlock(struct raid_softc *rs) 2209 { 2210 int error; 2211 2212 error = 0; 2213 mutex_enter(&rs->sc_mutex); 2214 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2215 rs->sc_flags |= RAIDF_WANTED; 2216 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex); 2217 if (error != 0) 2218 goto done; 2219 } 2220 rs->sc_flags |= RAIDF_LOCKED; 2221 done: 2222 mutex_exit(&rs->sc_mutex); 2223 return error; 2224 } 2225 /* 2226 * Unlock and wake up any waiters. 2227 */ 2228 static void 2229 raidunlock(struct raid_softc *rs) 2230 { 2231 2232 mutex_enter(&rs->sc_mutex); 2233 rs->sc_flags &= ~RAIDF_LOCKED; 2234 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2235 rs->sc_flags &= ~RAIDF_WANTED; 2236 cv_broadcast(&rs->sc_cv); 2237 } 2238 mutex_exit(&rs->sc_mutex); 2239 } 2240 2241 2242 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2243 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2244 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE 2245 2246 static daddr_t 2247 rf_component_info_offset(void) 2248 { 2249 2250 return RF_COMPONENT_INFO_OFFSET; 2251 } 2252 2253 static daddr_t 2254 rf_component_info_size(unsigned secsize) 2255 { 2256 daddr_t info_size; 2257 2258 KASSERT(secsize); 2259 if (secsize > RF_COMPONENT_INFO_SIZE) 2260 info_size = secsize; 2261 else 2262 info_size = RF_COMPONENT_INFO_SIZE; 2263 2264 return info_size; 2265 } 2266 2267 static daddr_t 2268 rf_parity_map_offset(RF_Raid_t *raidPtr) 2269 { 2270 daddr_t map_offset; 2271 2272 KASSERT(raidPtr->bytesPerSector); 2273 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE) 2274 map_offset = raidPtr->bytesPerSector; 2275 else 2276 map_offset = RF_COMPONENT_INFO_SIZE; 2277 map_offset += rf_component_info_offset(); 2278 2279 return map_offset; 2280 } 2281 2282 static daddr_t 2283 rf_parity_map_size(RF_Raid_t *raidPtr) 2284 { 2285 daddr_t map_size; 2286 2287 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE) 2288 map_size = raidPtr->bytesPerSector; 2289 else 2290 map_size = RF_PARITY_MAP_SIZE; 2291 2292 return map_size; 2293 } 2294 2295 int 2296 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col) 2297 { 2298 RF_ComponentLabel_t *clabel; 2299 2300 clabel = raidget_component_label(raidPtr, col); 2301 clabel->clean = RF_RAID_CLEAN; 2302 raidflush_component_label(raidPtr, col); 2303 return(0); 2304 } 2305 2306 2307 int 2308 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col) 2309 { 2310 RF_ComponentLabel_t *clabel; 2311 2312 clabel = raidget_component_label(raidPtr, col); 2313 clabel->clean = RF_RAID_DIRTY; 2314 raidflush_component_label(raidPtr, col); 2315 return(0); 2316 } 2317 2318 int 2319 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2320 { 2321 KASSERT(raidPtr->bytesPerSector); 2322 2323 return raidread_component_label(raidPtr->bytesPerSector, 2324 raidPtr->Disks[col].dev, 2325 raidPtr->raid_cinfo[col].ci_vp, 2326 &raidPtr->raid_cinfo[col].ci_label); 2327 } 2328 2329 RF_ComponentLabel_t * 2330 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2331 { 2332 return &raidPtr->raid_cinfo[col].ci_label; 2333 } 2334 2335 int 2336 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2337 { 2338 RF_ComponentLabel_t *label; 2339 2340 label = &raidPtr->raid_cinfo[col].ci_label; 2341 label->mod_counter = raidPtr->mod_counter; 2342 #ifndef RF_NO_PARITY_MAP 2343 label->parity_map_modcount = label->mod_counter; 2344 #endif 2345 return raidwrite_component_label(raidPtr->bytesPerSector, 2346 raidPtr->Disks[col].dev, 2347 raidPtr->raid_cinfo[col].ci_vp, label); 2348 } 2349 2350 /* 2351 * Swap the label endianness. 2352 * 2353 * Everything in the component label is 4-byte-swapped except the version, 2354 * which is kept in the byte-swapped version at all times, and indicates 2355 * for the writer that a swap is necessary. 2356 * 2357 * For reads it is expected that out_label == clabel, but writes expect 2358 * separate labels so only the re-swapped label is written out to disk, 2359 * leaving the swapped-except-version internally. 2360 * 2361 * Only support swapping label version 2. 2362 */ 2363 static void 2364 rf_swap_label(RF_ComponentLabel_t *clabel, RF_ComponentLabel_t *out_label) 2365 { 2366 int *in, *out, *in_last; 2367 2368 KASSERT(clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)); 2369 2370 /* Don't swap the label, but do copy it. */ 2371 out_label->version = clabel->version; 2372 2373 in = &clabel->serial_number; 2374 in_last = &clabel->future_use2[42]; 2375 out = &out_label->serial_number; 2376 2377 for (; in < in_last; in++, out++) 2378 *out = bswap32(*in); 2379 } 2380 2381 static int 2382 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2383 RF_ComponentLabel_t *clabel) 2384 { 2385 int error; 2386 2387 error = raidread_component_area(dev, b_vp, clabel, 2388 sizeof(RF_ComponentLabel_t), 2389 rf_component_info_offset(), 2390 rf_component_info_size(secsize)); 2391 2392 if (error == 0 && 2393 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) { 2394 rf_swap_label(clabel, clabel); 2395 } 2396 2397 return error; 2398 } 2399 2400 /* ARGSUSED */ 2401 static int 2402 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data, 2403 size_t msize, daddr_t offset, daddr_t dsize) 2404 { 2405 struct buf *bp; 2406 int error; 2407 2408 /* XXX should probably ensure that we don't try to do this if 2409 someone has changed rf_protected_sectors. */ 2410 2411 if (b_vp == NULL) { 2412 /* For whatever reason, this component is not valid. 2413 Don't try to read a component label from it. */ 2414 return(EINVAL); 2415 } 2416 2417 /* get a block of the appropriate size... */ 2418 bp = geteblk((int)dsize); 2419 bp->b_dev = dev; 2420 2421 /* get our ducks in a row for the read */ 2422 bp->b_blkno = offset / DEV_BSIZE; 2423 bp->b_bcount = dsize; 2424 bp->b_flags |= B_READ; 2425 bp->b_resid = dsize; 2426 2427 bdev_strategy(bp); 2428 error = biowait(bp); 2429 2430 if (!error) { 2431 memcpy(data, bp->b_data, msize); 2432 } 2433 2434 brelse(bp, 0); 2435 return(error); 2436 } 2437 2438 static int 2439 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2440 RF_ComponentLabel_t *clabel) 2441 { 2442 RF_ComponentLabel_t *clabel_write = clabel; 2443 RF_ComponentLabel_t lclabel; 2444 int error; 2445 2446 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) { 2447 clabel_write = &lclabel; 2448 rf_swap_label(clabel, clabel_write); 2449 } 2450 error = raidwrite_component_area(dev, b_vp, clabel_write, 2451 sizeof(RF_ComponentLabel_t), 2452 rf_component_info_offset(), 2453 rf_component_info_size(secsize), 0); 2454 2455 return error; 2456 } 2457 2458 /* ARGSUSED */ 2459 static int 2460 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data, 2461 size_t msize, daddr_t offset, daddr_t dsize, int asyncp) 2462 { 2463 struct buf *bp; 2464 int error; 2465 2466 /* get a block of the appropriate size... */ 2467 bp = geteblk((int)dsize); 2468 bp->b_dev = dev; 2469 2470 /* get our ducks in a row for the write */ 2471 bp->b_blkno = offset / DEV_BSIZE; 2472 bp->b_bcount = dsize; 2473 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0); 2474 bp->b_resid = dsize; 2475 2476 memset(bp->b_data, 0, dsize); 2477 memcpy(bp->b_data, data, msize); 2478 2479 bdev_strategy(bp); 2480 if (asyncp) 2481 return 0; 2482 error = biowait(bp); 2483 brelse(bp, 0); 2484 if (error) { 2485 #if 1 2486 printf("Failed to write RAID component info!\n"); 2487 #endif 2488 } 2489 2490 return(error); 2491 } 2492 2493 void 2494 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2495 { 2496 int c; 2497 2498 for (c = 0; c < raidPtr->numCol; c++) { 2499 /* Skip dead disks. */ 2500 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2501 continue; 2502 /* XXXjld: what if an error occurs here? */ 2503 raidwrite_component_area(raidPtr->Disks[c].dev, 2504 raidPtr->raid_cinfo[c].ci_vp, map, 2505 RF_PARITYMAP_NBYTE, 2506 rf_parity_map_offset(raidPtr), 2507 rf_parity_map_size(raidPtr), 0); 2508 } 2509 } 2510 2511 void 2512 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2513 { 2514 struct rf_paritymap_ondisk tmp; 2515 int c,first; 2516 2517 first=1; 2518 for (c = 0; c < raidPtr->numCol; c++) { 2519 /* Skip dead disks. */ 2520 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2521 continue; 2522 raidread_component_area(raidPtr->Disks[c].dev, 2523 raidPtr->raid_cinfo[c].ci_vp, &tmp, 2524 RF_PARITYMAP_NBYTE, 2525 rf_parity_map_offset(raidPtr), 2526 rf_parity_map_size(raidPtr)); 2527 if (first) { 2528 memcpy(map, &tmp, sizeof(*map)); 2529 first = 0; 2530 } else { 2531 rf_paritymap_merge(map, &tmp); 2532 } 2533 } 2534 } 2535 2536 void 2537 rf_markalldirty(RF_Raid_t *raidPtr) 2538 { 2539 RF_ComponentLabel_t *clabel; 2540 int sparecol; 2541 int c; 2542 int j; 2543 int scol = -1; 2544 2545 raidPtr->mod_counter++; 2546 for (c = 0; c < raidPtr->numCol; c++) { 2547 /* we don't want to touch (at all) a disk that has 2548 failed */ 2549 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2550 clabel = raidget_component_label(raidPtr, c); 2551 if (clabel->status == rf_ds_spared) { 2552 /* XXX do something special... 2553 but whatever you do, don't 2554 try to access it!! */ 2555 } else { 2556 raidmarkdirty(raidPtr, c); 2557 } 2558 } 2559 } 2560 2561 for( c = 0; c < raidPtr->numSpare ; c++) { 2562 sparecol = raidPtr->numCol + c; 2563 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2564 /* 2565 2566 we claim this disk is "optimal" if it's 2567 rf_ds_used_spare, as that means it should be 2568 directly substitutable for the disk it replaced. 2569 We note that too... 2570 2571 */ 2572 2573 for(j=0;j<raidPtr->numCol;j++) { 2574 if (raidPtr->Disks[j].spareCol == sparecol) { 2575 scol = j; 2576 break; 2577 } 2578 } 2579 2580 clabel = raidget_component_label(raidPtr, sparecol); 2581 /* make sure status is noted */ 2582 2583 raid_init_component_label(raidPtr, clabel); 2584 2585 clabel->row = 0; 2586 clabel->column = scol; 2587 /* Note: we *don't* change status from rf_ds_used_spare 2588 to rf_ds_optimal */ 2589 /* clabel.status = rf_ds_optimal; */ 2590 2591 raidmarkdirty(raidPtr, sparecol); 2592 } 2593 } 2594 } 2595 2596 2597 void 2598 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2599 { 2600 RF_ComponentLabel_t *clabel; 2601 int sparecol; 2602 int c; 2603 int j; 2604 int scol; 2605 struct raid_softc *rs = raidPtr->softc; 2606 2607 scol = -1; 2608 2609 /* XXX should do extra checks to make sure things really are clean, 2610 rather than blindly setting the clean bit... */ 2611 2612 raidPtr->mod_counter++; 2613 2614 for (c = 0; c < raidPtr->numCol; c++) { 2615 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2616 clabel = raidget_component_label(raidPtr, c); 2617 /* make sure status is noted */ 2618 clabel->status = rf_ds_optimal; 2619 2620 /* note what unit we are configured as */ 2621 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0) 2622 clabel->last_unit = raidPtr->raidid; 2623 2624 raidflush_component_label(raidPtr, c); 2625 if (final == RF_FINAL_COMPONENT_UPDATE) { 2626 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2627 raidmarkclean(raidPtr, c); 2628 } 2629 } 2630 } 2631 /* else we don't touch it.. */ 2632 } 2633 2634 for( c = 0; c < raidPtr->numSpare ; c++) { 2635 sparecol = raidPtr->numCol + c; 2636 /* Need to ensure that the reconstruct actually completed! */ 2637 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2638 /* 2639 2640 we claim this disk is "optimal" if it's 2641 rf_ds_used_spare, as that means it should be 2642 directly substitutable for the disk it replaced. 2643 We note that too... 2644 2645 */ 2646 2647 for(j=0;j<raidPtr->numCol;j++) { 2648 if (raidPtr->Disks[j].spareCol == sparecol) { 2649 scol = j; 2650 break; 2651 } 2652 } 2653 2654 /* XXX shouldn't *really* need this... */ 2655 clabel = raidget_component_label(raidPtr, sparecol); 2656 /* make sure status is noted */ 2657 2658 raid_init_component_label(raidPtr, clabel); 2659 2660 clabel->column = scol; 2661 clabel->status = rf_ds_optimal; 2662 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0) 2663 clabel->last_unit = raidPtr->raidid; 2664 2665 raidflush_component_label(raidPtr, sparecol); 2666 if (final == RF_FINAL_COMPONENT_UPDATE) { 2667 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2668 raidmarkclean(raidPtr, sparecol); 2669 } 2670 } 2671 } 2672 } 2673 } 2674 2675 void 2676 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2677 { 2678 2679 if (vp != NULL) { 2680 if (auto_configured == 1) { 2681 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2682 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2683 vput(vp); 2684 2685 } else { 2686 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2687 } 2688 } 2689 } 2690 2691 2692 void 2693 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2694 { 2695 int r,c; 2696 struct vnode *vp; 2697 int acd; 2698 2699 2700 /* We take this opportunity to close the vnodes like we should.. */ 2701 2702 for (c = 0; c < raidPtr->numCol; c++) { 2703 vp = raidPtr->raid_cinfo[c].ci_vp; 2704 acd = raidPtr->Disks[c].auto_configured; 2705 rf_close_component(raidPtr, vp, acd); 2706 raidPtr->raid_cinfo[c].ci_vp = NULL; 2707 raidPtr->Disks[c].auto_configured = 0; 2708 } 2709 2710 for (r = 0; r < raidPtr->numSpare; r++) { 2711 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2712 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2713 rf_close_component(raidPtr, vp, acd); 2714 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2715 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2716 } 2717 } 2718 2719 2720 static void 2721 rf_ReconThread(struct rf_recon_req_internal *req) 2722 { 2723 int s; 2724 RF_Raid_t *raidPtr; 2725 2726 s = splbio(); 2727 raidPtr = (RF_Raid_t *) req->raidPtr; 2728 raidPtr->recon_in_progress = 1; 2729 2730 if (req->flags & RF_FDFLAGS_RECON_FORCE) { 2731 raidPtr->forceRecon = 1; 2732 } 2733 2734 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2735 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2736 2737 if (req->flags & RF_FDFLAGS_RECON_FORCE) { 2738 raidPtr->forceRecon = 0; 2739 } 2740 2741 RF_Free(req, sizeof(*req)); 2742 2743 raidPtr->recon_in_progress = 0; 2744 splx(s); 2745 2746 /* That's all... */ 2747 kthread_exit(0); /* does not return */ 2748 } 2749 2750 static void 2751 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2752 { 2753 int retcode; 2754 int s; 2755 2756 raidPtr->parity_rewrite_stripes_done = 0; 2757 raidPtr->parity_rewrite_in_progress = 1; 2758 s = splbio(); 2759 retcode = rf_RewriteParity(raidPtr); 2760 splx(s); 2761 if (retcode) { 2762 printf("raid%d: Error re-writing parity (%d)!\n", 2763 raidPtr->raidid, retcode); 2764 } else { 2765 /* set the clean bit! If we shutdown correctly, 2766 the clean bit on each component label will get 2767 set */ 2768 raidPtr->parity_good = RF_RAID_CLEAN; 2769 } 2770 raidPtr->parity_rewrite_in_progress = 0; 2771 2772 /* Anyone waiting for us to stop? If so, inform them... */ 2773 if (raidPtr->waitShutdown) { 2774 rf_lock_mutex2(raidPtr->rad_lock); 2775 cv_broadcast(&raidPtr->parity_rewrite_cv); 2776 rf_unlock_mutex2(raidPtr->rad_lock); 2777 } 2778 2779 /* That's all... */ 2780 kthread_exit(0); /* does not return */ 2781 } 2782 2783 2784 static void 2785 rf_CopybackThread(RF_Raid_t *raidPtr) 2786 { 2787 int s; 2788 2789 raidPtr->copyback_in_progress = 1; 2790 s = splbio(); 2791 rf_CopybackReconstructedData(raidPtr); 2792 splx(s); 2793 raidPtr->copyback_in_progress = 0; 2794 2795 /* That's all... */ 2796 kthread_exit(0); /* does not return */ 2797 } 2798 2799 2800 static void 2801 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req) 2802 { 2803 int s; 2804 RF_Raid_t *raidPtr; 2805 2806 s = splbio(); 2807 raidPtr = req->raidPtr; 2808 raidPtr->recon_in_progress = 1; 2809 2810 if (req->flags & RF_FDFLAGS_RECON_FORCE) { 2811 raidPtr->forceRecon = 1; 2812 } 2813 2814 rf_ReconstructInPlace(raidPtr, req->col); 2815 2816 if (req->flags & RF_FDFLAGS_RECON_FORCE) { 2817 raidPtr->forceRecon = 0; 2818 } 2819 2820 RF_Free(req, sizeof(*req)); 2821 raidPtr->recon_in_progress = 0; 2822 splx(s); 2823 2824 /* That's all... */ 2825 kthread_exit(0); /* does not return */ 2826 } 2827 2828 static RF_AutoConfig_t * 2829 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 2830 const char *cname, RF_SectorCount_t size, uint64_t numsecs, 2831 unsigned secsize) 2832 { 2833 int good_one = 0; 2834 RF_ComponentLabel_t *clabel; 2835 RF_AutoConfig_t *ac; 2836 2837 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK); 2838 2839 if (!raidread_component_label(secsize, dev, vp, clabel)) { 2840 /* Got the label. Does it look reasonable? */ 2841 if (rf_reasonable_label(clabel, numsecs) && 2842 (rf_component_label_partitionsize(clabel) <= size)) { 2843 #ifdef DEBUG 2844 printf("Component on: %s: %llu\n", 2845 cname, (unsigned long long)size); 2846 rf_print_component_label(clabel); 2847 #endif 2848 /* if it's reasonable, add it, else ignore it. */ 2849 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 2850 M_WAITOK); 2851 strlcpy(ac->devname, cname, sizeof(ac->devname)); 2852 ac->dev = dev; 2853 ac->vp = vp; 2854 ac->clabel = clabel; 2855 ac->next = ac_list; 2856 ac_list = ac; 2857 good_one = 1; 2858 } 2859 } 2860 if (!good_one) { 2861 /* cleanup */ 2862 free(clabel, M_RAIDFRAME); 2863 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2864 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2865 vput(vp); 2866 } 2867 return ac_list; 2868 } 2869 2870 static RF_AutoConfig_t * 2871 rf_find_raid_components(void) 2872 { 2873 struct vnode *vp; 2874 struct disklabel label; 2875 device_t dv; 2876 deviter_t di; 2877 dev_t dev; 2878 int bmajor, bminor, wedge, rf_part_found; 2879 int error; 2880 int i; 2881 RF_AutoConfig_t *ac_list; 2882 uint64_t numsecs; 2883 unsigned secsize; 2884 int dowedges; 2885 2886 /* initialize the AutoConfig list */ 2887 ac_list = NULL; 2888 2889 /* 2890 * we begin by trolling through *all* the devices on the system *twice* 2891 * first we scan for wedges, second for other devices. This avoids 2892 * using a raw partition instead of a wedge that covers the whole disk 2893 */ 2894 2895 for (dowedges=1; dowedges>=0; --dowedges) { 2896 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; 2897 dv = deviter_next(&di)) { 2898 2899 /* we are only interested in disks */ 2900 if (device_class(dv) != DV_DISK) 2901 continue; 2902 2903 /* we don't care about floppies */ 2904 if (device_is_a(dv, "fd")) { 2905 continue; 2906 } 2907 2908 /* we don't care about CDs. */ 2909 if (device_is_a(dv, "cd")) { 2910 continue; 2911 } 2912 2913 /* we don't care about md. */ 2914 if (device_is_a(dv, "md")) { 2915 continue; 2916 } 2917 2918 /* hdfd is the Atari/Hades floppy driver */ 2919 if (device_is_a(dv, "hdfd")) { 2920 continue; 2921 } 2922 2923 /* fdisa is the Atari/Milan floppy driver */ 2924 if (device_is_a(dv, "fdisa")) { 2925 continue; 2926 } 2927 2928 /* we don't care about spiflash */ 2929 if (device_is_a(dv, "spiflash")) { 2930 continue; 2931 } 2932 2933 /* are we in the wedges pass ? */ 2934 wedge = device_is_a(dv, "dk"); 2935 if (wedge != dowedges) { 2936 continue; 2937 } 2938 2939 /* need to find the device_name_to_block_device_major stuff */ 2940 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 2941 2942 rf_part_found = 0; /*No raid partition as yet*/ 2943 2944 /* get a vnode for the raw partition of this disk */ 2945 bminor = minor(device_unit(dv)); 2946 dev = wedge ? makedev(bmajor, bminor) : 2947 MAKEDISKDEV(bmajor, bminor, RAW_PART); 2948 if (bdevvp(dev, &vp)) 2949 panic("RAID can't alloc vnode"); 2950 2951 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2952 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED); 2953 2954 if (error) { 2955 /* "Who cares." Continue looking 2956 for something that exists*/ 2957 vput(vp); 2958 continue; 2959 } 2960 2961 VOP_UNLOCK(vp); 2962 error = getdisksize(vp, &numsecs, &secsize); 2963 if (error) { 2964 /* 2965 * Pseudo devices like vnd and cgd can be 2966 * opened but may still need some configuration. 2967 * Ignore these quietly. 2968 */ 2969 if (error != ENXIO) 2970 printf("RAIDframe: can't get disk size" 2971 " for dev %s (%d)\n", 2972 device_xname(dv), error); 2973 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2974 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2975 vput(vp); 2976 continue; 2977 } 2978 if (wedge) { 2979 struct dkwedge_info dkw; 2980 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 2981 NOCRED); 2982 if (error) { 2983 printf("RAIDframe: can't get wedge info for " 2984 "dev %s (%d)\n", device_xname(dv), error); 2985 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2986 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2987 vput(vp); 2988 continue; 2989 } 2990 2991 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 2992 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2993 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2994 vput(vp); 2995 continue; 2996 } 2997 2998 ac_list = rf_get_component(ac_list, dev, vp, 2999 device_xname(dv), dkw.dkw_size, numsecs, secsize); 3000 rf_part_found = 1; /*There is a raid component on this disk*/ 3001 continue; 3002 } 3003 3004 /* Ok, the disk exists. Go get the disklabel. */ 3005 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 3006 if (error) { 3007 /* 3008 * XXX can't happen - open() would 3009 * have errored out (or faked up one) 3010 */ 3011 if (error != ENOTTY) 3012 printf("RAIDframe: can't get label for dev " 3013 "%s (%d)\n", device_xname(dv), error); 3014 } 3015 3016 /* don't need this any more. We'll allocate it again 3017 a little later if we really do... */ 3018 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3019 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3020 vput(vp); 3021 3022 if (error) 3023 continue; 3024 3025 rf_part_found = 0; /*No raid partitions yet*/ 3026 for (i = 0; i < label.d_npartitions; i++) { 3027 char cname[sizeof(ac_list->devname)]; 3028 3029 /* We only support partitions marked as RAID */ 3030 if (label.d_partitions[i].p_fstype != FS_RAID) 3031 continue; 3032 3033 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 3034 if (bdevvp(dev, &vp)) 3035 panic("RAID can't alloc vnode"); 3036 3037 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3038 error = VOP_OPEN(vp, FREAD, NOCRED); 3039 if (error) { 3040 /* Not quite a 'whatever'. In 3041 * this situation we know 3042 * there is a FS_RAID 3043 * partition, but we can't 3044 * open it. The most likely 3045 * reason is that the 3046 * partition is already in 3047 * use by another RAID set. 3048 * So note that we've already 3049 * found a partition on this 3050 * disk so we don't attempt 3051 * to use the raw disk later. */ 3052 rf_part_found = 1; 3053 vput(vp); 3054 continue; 3055 } 3056 VOP_UNLOCK(vp); 3057 snprintf(cname, sizeof(cname), "%s%c", 3058 device_xname(dv), 'a' + i); 3059 ac_list = rf_get_component(ac_list, dev, vp, cname, 3060 label.d_partitions[i].p_size, numsecs, secsize); 3061 rf_part_found = 1; /*There is at least one raid partition on this disk*/ 3062 } 3063 3064 /* 3065 *If there is no raid component on this disk, either in a 3066 *disklabel or inside a wedge, check the raw partition as well, 3067 *as it is possible to configure raid components on raw disk 3068 *devices. 3069 */ 3070 3071 if (!rf_part_found) { 3072 char cname[sizeof(ac_list->devname)]; 3073 3074 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART); 3075 if (bdevvp(dev, &vp)) 3076 panic("RAID can't alloc vnode"); 3077 3078 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3079 3080 error = VOP_OPEN(vp, FREAD, NOCRED); 3081 if (error) { 3082 /* Whatever... */ 3083 vput(vp); 3084 continue; 3085 } 3086 VOP_UNLOCK(vp); 3087 snprintf(cname, sizeof(cname), "%s%c", 3088 device_xname(dv), 'a' + RAW_PART); 3089 ac_list = rf_get_component(ac_list, dev, vp, cname, 3090 label.d_partitions[RAW_PART].p_size, numsecs, secsize); 3091 } 3092 } 3093 deviter_release(&di); 3094 } 3095 return ac_list; 3096 } 3097 3098 int 3099 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3100 { 3101 3102 if ((clabel->version==RF_COMPONENT_LABEL_VERSION_1 || 3103 clabel->version==RF_COMPONENT_LABEL_VERSION || 3104 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) && 3105 (clabel->clean == RF_RAID_CLEAN || 3106 clabel->clean == RF_RAID_DIRTY) && 3107 clabel->row >=0 && 3108 clabel->column >= 0 && 3109 clabel->num_rows > 0 && 3110 clabel->num_columns > 0 && 3111 clabel->row < clabel->num_rows && 3112 clabel->column < clabel->num_columns && 3113 clabel->blockSize > 0 && 3114 /* 3115 * numBlocksHi may contain garbage, but it is ok since 3116 * the type is unsigned. If it is really garbage, 3117 * rf_fix_old_label_size() will fix it. 3118 */ 3119 rf_component_label_numblocks(clabel) > 0) { 3120 /* 3121 * label looks reasonable enough... 3122 * let's make sure it has no old garbage. 3123 */ 3124 if (numsecs) 3125 rf_fix_old_label_size(clabel, numsecs); 3126 return(1); 3127 } 3128 return(0); 3129 } 3130 3131 3132 /* 3133 * For reasons yet unknown, some old component labels have garbage in 3134 * the newer numBlocksHi region, and this causes lossage. Since those 3135 * disks will also have numsecs set to less than 32 bits of sectors, 3136 * we can determine when this corruption has occurred, and fix it. 3137 * 3138 * The exact same problem, with the same unknown reason, happens to 3139 * the partitionSizeHi member as well. 3140 */ 3141 static void 3142 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3143 { 3144 3145 if (numsecs < ((uint64_t)1 << 32)) { 3146 if (clabel->numBlocksHi) { 3147 printf("WARNING: total sectors < 32 bits, yet " 3148 "numBlocksHi set\n" 3149 "WARNING: resetting numBlocksHi to zero.\n"); 3150 clabel->numBlocksHi = 0; 3151 } 3152 3153 if (clabel->partitionSizeHi) { 3154 printf("WARNING: total sectors < 32 bits, yet " 3155 "partitionSizeHi set\n" 3156 "WARNING: resetting partitionSizeHi to zero.\n"); 3157 clabel->partitionSizeHi = 0; 3158 } 3159 } 3160 } 3161 3162 3163 #ifdef DEBUG 3164 void 3165 rf_print_component_label(RF_ComponentLabel_t *clabel) 3166 { 3167 uint64_t numBlocks; 3168 static const char *rp[] = { 3169 "No", "Force", "Soft", "*invalid*" 3170 }; 3171 3172 3173 numBlocks = rf_component_label_numblocks(clabel); 3174 3175 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3176 clabel->row, clabel->column, 3177 clabel->num_rows, clabel->num_columns); 3178 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3179 clabel->version, clabel->serial_number, 3180 clabel->mod_counter); 3181 printf(" Clean: %s Status: %d\n", 3182 clabel->clean ? "Yes" : "No", clabel->status); 3183 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3184 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3185 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n", 3186 (char) clabel->parityConfig, clabel->blockSize, numBlocks); 3187 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No"); 3188 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]); 3189 printf(" Last configured as: raid%d\n", clabel->last_unit); 3190 #if 0 3191 printf(" Config order: %d\n", clabel->config_order); 3192 #endif 3193 3194 } 3195 #endif 3196 3197 static RF_ConfigSet_t * 3198 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3199 { 3200 RF_AutoConfig_t *ac; 3201 RF_ConfigSet_t *config_sets; 3202 RF_ConfigSet_t *cset; 3203 RF_AutoConfig_t *ac_next; 3204 3205 3206 config_sets = NULL; 3207 3208 /* Go through the AutoConfig list, and figure out which components 3209 belong to what sets. */ 3210 ac = ac_list; 3211 while(ac!=NULL) { 3212 /* we're going to putz with ac->next, so save it here 3213 for use at the end of the loop */ 3214 ac_next = ac->next; 3215 3216 if (config_sets == NULL) { 3217 /* will need at least this one... */ 3218 config_sets = malloc(sizeof(RF_ConfigSet_t), 3219 M_RAIDFRAME, M_WAITOK); 3220 /* this one is easy :) */ 3221 config_sets->ac = ac; 3222 config_sets->next = NULL; 3223 config_sets->rootable = 0; 3224 ac->next = NULL; 3225 } else { 3226 /* which set does this component fit into? */ 3227 cset = config_sets; 3228 while(cset!=NULL) { 3229 if (rf_does_it_fit(cset, ac)) { 3230 /* looks like it matches... */ 3231 ac->next = cset->ac; 3232 cset->ac = ac; 3233 break; 3234 } 3235 cset = cset->next; 3236 } 3237 if (cset==NULL) { 3238 /* didn't find a match above... new set..*/ 3239 cset = malloc(sizeof(RF_ConfigSet_t), 3240 M_RAIDFRAME, M_WAITOK); 3241 cset->ac = ac; 3242 ac->next = NULL; 3243 cset->next = config_sets; 3244 cset->rootable = 0; 3245 config_sets = cset; 3246 } 3247 } 3248 ac = ac_next; 3249 } 3250 3251 3252 return(config_sets); 3253 } 3254 3255 static int 3256 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3257 { 3258 RF_ComponentLabel_t *clabel1, *clabel2; 3259 3260 /* If this one matches the *first* one in the set, that's good 3261 enough, since the other members of the set would have been 3262 through here too... */ 3263 /* note that we are not checking partitionSize here.. 3264 3265 Note that we are also not checking the mod_counters here. 3266 If everything else matches except the mod_counter, that's 3267 good enough for this test. We will deal with the mod_counters 3268 a little later in the autoconfiguration process. 3269 3270 (clabel1->mod_counter == clabel2->mod_counter) && 3271 3272 The reason we don't check for this is that failed disks 3273 will have lower modification counts. If those disks are 3274 not added to the set they used to belong to, then they will 3275 form their own set, which may result in 2 different sets, 3276 for example, competing to be configured at raid0, and 3277 perhaps competing to be the root filesystem set. If the 3278 wrong ones get configured, or both attempt to become /, 3279 weird behaviour and or serious lossage will occur. Thus we 3280 need to bring them into the fold here, and kick them out at 3281 a later point. 3282 3283 */ 3284 3285 clabel1 = cset->ac->clabel; 3286 clabel2 = ac->clabel; 3287 if ((clabel1->version == clabel2->version) && 3288 (clabel1->serial_number == clabel2->serial_number) && 3289 (clabel1->num_rows == clabel2->num_rows) && 3290 (clabel1->num_columns == clabel2->num_columns) && 3291 (clabel1->sectPerSU == clabel2->sectPerSU) && 3292 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3293 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3294 (clabel1->parityConfig == clabel2->parityConfig) && 3295 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3296 (clabel1->blockSize == clabel2->blockSize) && 3297 rf_component_label_numblocks(clabel1) == 3298 rf_component_label_numblocks(clabel2) && 3299 (clabel1->autoconfigure == clabel2->autoconfigure) && 3300 (clabel1->root_partition == clabel2->root_partition) && 3301 (clabel1->last_unit == clabel2->last_unit) && 3302 (clabel1->config_order == clabel2->config_order)) { 3303 /* if it get's here, it almost *has* to be a match */ 3304 } else { 3305 /* it's not consistent with somebody in the set.. 3306 punt */ 3307 return(0); 3308 } 3309 /* all was fine.. it must fit... */ 3310 return(1); 3311 } 3312 3313 static int 3314 rf_have_enough_components(RF_ConfigSet_t *cset) 3315 { 3316 RF_AutoConfig_t *ac; 3317 RF_AutoConfig_t *auto_config; 3318 RF_ComponentLabel_t *clabel; 3319 int c; 3320 int num_cols; 3321 int num_missing; 3322 int mod_counter; 3323 int mod_counter_found; 3324 int even_pair_failed; 3325 char parity_type; 3326 3327 3328 /* check to see that we have enough 'live' components 3329 of this set. If so, we can configure it if necessary */ 3330 3331 num_cols = cset->ac->clabel->num_columns; 3332 parity_type = cset->ac->clabel->parityConfig; 3333 3334 /* XXX Check for duplicate components!?!?!? */ 3335 3336 /* Determine what the mod_counter is supposed to be for this set. */ 3337 3338 mod_counter_found = 0; 3339 mod_counter = 0; 3340 ac = cset->ac; 3341 while(ac!=NULL) { 3342 if (mod_counter_found==0) { 3343 mod_counter = ac->clabel->mod_counter; 3344 mod_counter_found = 1; 3345 } else { 3346 if (ac->clabel->mod_counter > mod_counter) { 3347 mod_counter = ac->clabel->mod_counter; 3348 } 3349 } 3350 ac = ac->next; 3351 } 3352 3353 num_missing = 0; 3354 auto_config = cset->ac; 3355 3356 even_pair_failed = 0; 3357 for(c=0; c<num_cols; c++) { 3358 ac = auto_config; 3359 while(ac!=NULL) { 3360 if ((ac->clabel->column == c) && 3361 (ac->clabel->mod_counter == mod_counter)) { 3362 /* it's this one... */ 3363 #ifdef DEBUG 3364 printf("Found: %s at %d\n", 3365 ac->devname,c); 3366 #endif 3367 break; 3368 } 3369 ac=ac->next; 3370 } 3371 if (ac==NULL) { 3372 /* Didn't find one here! */ 3373 /* special case for RAID 1, especially 3374 where there are more than 2 3375 components (where RAIDframe treats 3376 things a little differently :( ) */ 3377 if (parity_type == '1') { 3378 if (c%2 == 0) { /* even component */ 3379 even_pair_failed = 1; 3380 } else { /* odd component. If 3381 we're failed, and 3382 so is the even 3383 component, it's 3384 "Good Night, Charlie" */ 3385 if (even_pair_failed == 1) { 3386 return(0); 3387 } 3388 } 3389 } else { 3390 /* normal accounting */ 3391 num_missing++; 3392 } 3393 } 3394 if ((parity_type == '1') && (c%2 == 1)) { 3395 /* Just did an even component, and we didn't 3396 bail.. reset the even_pair_failed flag, 3397 and go on to the next component.... */ 3398 even_pair_failed = 0; 3399 } 3400 } 3401 3402 clabel = cset->ac->clabel; 3403 3404 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3405 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3406 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3407 /* XXX this needs to be made *much* more general */ 3408 /* Too many failures */ 3409 return(0); 3410 } 3411 /* otherwise, all is well, and we've got enough to take a kick 3412 at autoconfiguring this set */ 3413 return(1); 3414 } 3415 3416 static void 3417 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3418 RF_Raid_t *raidPtr) 3419 { 3420 RF_ComponentLabel_t *clabel; 3421 int i; 3422 3423 clabel = ac->clabel; 3424 3425 /* 1. Fill in the common stuff */ 3426 config->numCol = clabel->num_columns; 3427 config->numSpare = 0; /* XXX should this be set here? */ 3428 config->sectPerSU = clabel->sectPerSU; 3429 config->SUsPerPU = clabel->SUsPerPU; 3430 config->SUsPerRU = clabel->SUsPerRU; 3431 config->parityConfig = clabel->parityConfig; 3432 /* XXX... */ 3433 strcpy(config->diskQueueType,"fifo"); 3434 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3435 config->layoutSpecificSize = 0; /* XXX ?? */ 3436 3437 while(ac!=NULL) { 3438 /* row/col values will be in range due to the checks 3439 in reasonable_label() */ 3440 strcpy(config->devnames[0][ac->clabel->column], 3441 ac->devname); 3442 ac = ac->next; 3443 } 3444 3445 for(i=0;i<RF_MAXDBGV;i++) { 3446 config->debugVars[i][0] = 0; 3447 } 3448 } 3449 3450 static int 3451 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3452 { 3453 RF_ComponentLabel_t *clabel; 3454 int column; 3455 int sparecol; 3456 3457 raidPtr->autoconfigure = new_value; 3458 3459 for(column=0; column<raidPtr->numCol; column++) { 3460 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3461 clabel = raidget_component_label(raidPtr, column); 3462 clabel->autoconfigure = new_value; 3463 raidflush_component_label(raidPtr, column); 3464 } 3465 } 3466 for(column = 0; column < raidPtr->numSpare ; column++) { 3467 sparecol = raidPtr->numCol + column; 3468 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3469 clabel = raidget_component_label(raidPtr, sparecol); 3470 clabel->autoconfigure = new_value; 3471 raidflush_component_label(raidPtr, sparecol); 3472 } 3473 } 3474 return(new_value); 3475 } 3476 3477 static int 3478 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3479 { 3480 RF_ComponentLabel_t *clabel; 3481 int column; 3482 int sparecol; 3483 3484 raidPtr->root_partition = new_value; 3485 for(column=0; column<raidPtr->numCol; column++) { 3486 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3487 clabel = raidget_component_label(raidPtr, column); 3488 clabel->root_partition = new_value; 3489 raidflush_component_label(raidPtr, column); 3490 } 3491 } 3492 for(column = 0; column < raidPtr->numSpare ; column++) { 3493 sparecol = raidPtr->numCol + column; 3494 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3495 clabel = raidget_component_label(raidPtr, sparecol); 3496 clabel->root_partition = new_value; 3497 raidflush_component_label(raidPtr, sparecol); 3498 } 3499 } 3500 return(new_value); 3501 } 3502 3503 static void 3504 rf_release_all_vps(RF_ConfigSet_t *cset) 3505 { 3506 RF_AutoConfig_t *ac; 3507 3508 ac = cset->ac; 3509 while(ac!=NULL) { 3510 /* Close the vp, and give it back */ 3511 if (ac->vp) { 3512 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3513 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED); 3514 vput(ac->vp); 3515 ac->vp = NULL; 3516 } 3517 ac = ac->next; 3518 } 3519 } 3520 3521 3522 static void 3523 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3524 { 3525 RF_AutoConfig_t *ac; 3526 RF_AutoConfig_t *next_ac; 3527 3528 ac = cset->ac; 3529 while(ac!=NULL) { 3530 next_ac = ac->next; 3531 /* nuke the label */ 3532 free(ac->clabel, M_RAIDFRAME); 3533 /* cleanup the config structure */ 3534 free(ac, M_RAIDFRAME); 3535 /* "next.." */ 3536 ac = next_ac; 3537 } 3538 /* and, finally, nuke the config set */ 3539 free(cset, M_RAIDFRAME); 3540 } 3541 3542 3543 void 3544 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3545 { 3546 /* avoid over-writing byteswapped version. */ 3547 if (clabel->version != bswap32(RF_COMPONENT_LABEL_VERSION)) 3548 clabel->version = RF_COMPONENT_LABEL_VERSION; 3549 clabel->serial_number = raidPtr->serial_number; 3550 clabel->mod_counter = raidPtr->mod_counter; 3551 3552 clabel->num_rows = 1; 3553 clabel->num_columns = raidPtr->numCol; 3554 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3555 clabel->status = rf_ds_optimal; /* "It's good!" */ 3556 3557 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3558 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3559 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3560 3561 clabel->blockSize = raidPtr->bytesPerSector; 3562 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk); 3563 3564 /* XXX not portable */ 3565 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3566 clabel->maxOutstanding = raidPtr->maxOutstanding; 3567 clabel->autoconfigure = raidPtr->autoconfigure; 3568 clabel->root_partition = raidPtr->root_partition; 3569 clabel->last_unit = raidPtr->raidid; 3570 clabel->config_order = raidPtr->config_order; 3571 3572 #ifndef RF_NO_PARITY_MAP 3573 rf_paritymap_init_label(raidPtr->parity_map, clabel); 3574 #endif 3575 } 3576 3577 static struct raid_softc * 3578 rf_auto_config_set(RF_ConfigSet_t *cset) 3579 { 3580 RF_Raid_t *raidPtr; 3581 RF_Config_t *config; 3582 int raidID; 3583 struct raid_softc *sc; 3584 3585 #ifdef DEBUG 3586 printf("RAID autoconfigure\n"); 3587 #endif 3588 3589 /* 1. Create a config structure */ 3590 config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO); 3591 3592 /* 3593 2. Figure out what RAID ID this one is supposed to live at 3594 See if we can get the same RAID dev that it was configured 3595 on last time.. 3596 */ 3597 3598 raidID = cset->ac->clabel->last_unit; 3599 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0; 3600 sc = raidget(++raidID, false)) 3601 continue; 3602 #ifdef DEBUG 3603 printf("Configuring raid%d:\n",raidID); 3604 #endif 3605 3606 if (sc == NULL) 3607 sc = raidget(raidID, true); 3608 raidPtr = &sc->sc_r; 3609 3610 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3611 raidPtr->softc = sc; 3612 raidPtr->raidid = raidID; 3613 raidPtr->openings = RAIDOUTSTANDING; 3614 3615 /* 3. Build the configuration structure */ 3616 rf_create_configuration(cset->ac, config, raidPtr); 3617 3618 /* 4. Do the configuration */ 3619 if (rf_Configure(raidPtr, config, cset->ac) == 0) { 3620 raidinit(sc); 3621 3622 rf_markalldirty(raidPtr); 3623 raidPtr->autoconfigure = 1; /* XXX do this here? */ 3624 switch (cset->ac->clabel->root_partition) { 3625 case 1: /* Force Root */ 3626 case 2: /* Soft Root: root when boot partition part of raid */ 3627 /* 3628 * everything configured just fine. Make a note 3629 * that this set is eligible to be root, 3630 * or forced to be root 3631 */ 3632 cset->rootable = cset->ac->clabel->root_partition; 3633 /* XXX do this here? */ 3634 raidPtr->root_partition = cset->rootable; 3635 break; 3636 default: 3637 break; 3638 } 3639 } else { 3640 raidput(sc); 3641 sc = NULL; 3642 } 3643 3644 /* 5. Cleanup */ 3645 free(config, M_RAIDFRAME); 3646 return sc; 3647 } 3648 3649 void 3650 rf_pool_init(RF_Raid_t *raidPtr, char *w_chan, struct pool *p, size_t size, const char *pool_name, 3651 size_t xmin, size_t xmax) 3652 { 3653 3654 /* Format: raid%d_foo */ 3655 snprintf(w_chan, RF_MAX_POOLNAMELEN, "raid%d_%s", raidPtr->raidid, pool_name); 3656 3657 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3658 pool_sethiwat(p, xmax); 3659 pool_prime(p, xmin); 3660 } 3661 3662 3663 /* 3664 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue 3665 * to see if there is IO pending and if that IO could possibly be done 3666 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1 3667 * otherwise. 3668 * 3669 */ 3670 int 3671 rf_buf_queue_check(RF_Raid_t *raidPtr) 3672 { 3673 struct raid_softc *rs; 3674 struct dk_softc *dksc; 3675 3676 rs = raidPtr->softc; 3677 dksc = &rs->sc_dksc; 3678 3679 if ((rs->sc_flags & RAIDF_INITED) == 0) 3680 return 1; 3681 3682 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) { 3683 /* there is work to do */ 3684 return 0; 3685 } 3686 /* default is nothing to do */ 3687 return 1; 3688 } 3689 3690 int 3691 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr) 3692 { 3693 uint64_t numsecs; 3694 unsigned secsize; 3695 int error; 3696 3697 error = getdisksize(vp, &numsecs, &secsize); 3698 if (error == 0) { 3699 diskPtr->blockSize = secsize; 3700 diskPtr->numBlocks = numsecs - rf_protectedSectors; 3701 diskPtr->partitionSize = numsecs; 3702 return 0; 3703 } 3704 return error; 3705 } 3706 3707 static int 3708 raid_match(device_t self, cfdata_t cfdata, void *aux) 3709 { 3710 return 1; 3711 } 3712 3713 static void 3714 raid_attach(device_t parent, device_t self, void *aux) 3715 { 3716 } 3717 3718 3719 static int 3720 raid_detach(device_t self, int flags) 3721 { 3722 int error; 3723 struct raid_softc *rs = raidsoftc(self); 3724 3725 if (rs == NULL) 3726 return ENXIO; 3727 3728 if ((error = raidlock(rs)) != 0) 3729 return error; 3730 3731 error = raid_detach_unlocked(rs); 3732 3733 raidunlock(rs); 3734 3735 /* XXX raid can be referenced here */ 3736 3737 if (error) 3738 return error; 3739 3740 /* Free the softc */ 3741 raidput(rs); 3742 3743 return 0; 3744 } 3745 3746 static void 3747 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr) 3748 { 3749 struct dk_softc *dksc = &rs->sc_dksc; 3750 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom; 3751 3752 memset(dg, 0, sizeof(*dg)); 3753 3754 dg->dg_secperunit = raidPtr->totalSectors; 3755 dg->dg_secsize = raidPtr->bytesPerSector; 3756 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe; 3757 dg->dg_ntracks = 4 * raidPtr->numCol; 3758 3759 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL); 3760 } 3761 3762 /* 3763 * Get cache info for all the components (including spares). 3764 * Returns intersection of all the cache flags of all disks, or first 3765 * error if any encountered. 3766 * XXXfua feature flags can change as spares are added - lock down somehow 3767 */ 3768 static int 3769 rf_get_component_caches(RF_Raid_t *raidPtr, int *data) 3770 { 3771 int c; 3772 int error; 3773 int dkwhole = 0, dkpart; 3774 3775 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) { 3776 /* 3777 * Check any non-dead disk, even when currently being 3778 * reconstructed. 3779 */ 3780 if (!RF_DEAD_DISK(raidPtr->Disks[c].status) 3781 || raidPtr->Disks[c].status == rf_ds_reconstructing) { 3782 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, 3783 DIOCGCACHE, &dkpart, FREAD, NOCRED); 3784 if (error) { 3785 if (error != ENODEV) { 3786 printf("raid%d: get cache for component %s failed\n", 3787 raidPtr->raidid, 3788 raidPtr->Disks[c].devname); 3789 } 3790 3791 return error; 3792 } 3793 3794 if (c == 0) 3795 dkwhole = dkpart; 3796 else 3797 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart); 3798 } 3799 } 3800 3801 *data = dkwhole; 3802 3803 return 0; 3804 } 3805 3806 /* 3807 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components. 3808 * We end up returning whatever error was returned by the first cache flush 3809 * that fails. 3810 */ 3811 3812 static int 3813 rf_sync_component_cache(RF_Raid_t *raidPtr, int c, int force) 3814 { 3815 int e = 0; 3816 for (int i = 0; i < 5; i++) { 3817 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC, 3818 &force, FWRITE, NOCRED); 3819 if (!e || e == ENODEV) 3820 return e; 3821 printf("raid%d: cache flush[%d] to component %s failed (%d)\n", 3822 raidPtr->raidid, i, raidPtr->Disks[c].devname, e); 3823 } 3824 return e; 3825 } 3826 3827 int 3828 rf_sync_component_caches(RF_Raid_t *raidPtr, int force) 3829 { 3830 int c, error; 3831 3832 error = 0; 3833 for (c = 0; c < raidPtr->numCol; c++) { 3834 if (raidPtr->Disks[c].status == rf_ds_optimal) { 3835 int e = rf_sync_component_cache(raidPtr, c, force); 3836 if (e && !error) 3837 error = e; 3838 } 3839 } 3840 3841 for (c = 0; c < raidPtr->numSpare ; c++) { 3842 int sparecol = raidPtr->numCol + c; 3843 /* Need to ensure that the reconstruct actually completed! */ 3844 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3845 int e = rf_sync_component_cache(raidPtr, sparecol, 3846 force); 3847 if (e && !error) 3848 error = e; 3849 } 3850 } 3851 return error; 3852 } 3853 3854 /* Fill in info with the current status */ 3855 void 3856 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info) 3857 { 3858 3859 memset(info, 0, sizeof(*info)); 3860 3861 if (raidPtr->status != rf_rs_reconstructing) { 3862 info->total = 100; 3863 info->completed = 100; 3864 } else { 3865 info->total = raidPtr->reconControl->numRUsTotal; 3866 info->completed = raidPtr->reconControl->numRUsComplete; 3867 } 3868 info->remaining = info->total - info->completed; 3869 } 3870 3871 /* Fill in info with the current status */ 3872 void 3873 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info) 3874 { 3875 3876 memset(info, 0, sizeof(*info)); 3877 3878 if (raidPtr->parity_rewrite_in_progress == 1) { 3879 info->total = raidPtr->Layout.numStripe; 3880 info->completed = raidPtr->parity_rewrite_stripes_done; 3881 } else { 3882 info->completed = 100; 3883 info->total = 100; 3884 } 3885 info->remaining = info->total - info->completed; 3886 } 3887 3888 /* Fill in info with the current status */ 3889 void 3890 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info) 3891 { 3892 3893 memset(info, 0, sizeof(*info)); 3894 3895 if (raidPtr->copyback_in_progress == 1) { 3896 info->total = raidPtr->Layout.numStripe; 3897 info->completed = raidPtr->copyback_stripes_done; 3898 info->remaining = info->total - info->completed; 3899 } else { 3900 info->remaining = 0; 3901 info->completed = 100; 3902 info->total = 100; 3903 } 3904 } 3905 3906 /* Fill in config with the current info */ 3907 int 3908 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config) 3909 { 3910 int d, i, j; 3911 3912 if (!raidPtr->valid) 3913 return ENODEV; 3914 config->cols = raidPtr->numCol; 3915 config->ndevs = raidPtr->numCol; 3916 if (config->ndevs >= RF_MAX_DISKS) 3917 return ENOMEM; 3918 config->nspares = raidPtr->numSpare; 3919 if (config->nspares >= RF_MAX_DISKS) 3920 return ENOMEM; 3921 config->maxqdepth = raidPtr->maxQueueDepth; 3922 d = 0; 3923 for (j = 0; j < config->cols; j++) { 3924 config->devs[d] = raidPtr->Disks[j]; 3925 d++; 3926 } 3927 for (j = config->cols, i = 0; i < config->nspares; i++, j++) { 3928 config->spares[i] = raidPtr->Disks[j]; 3929 if (config->spares[i].status == rf_ds_rebuilding_spare) { 3930 /* XXX: raidctl(8) expects to see this as a used spare */ 3931 config->spares[i].status = rf_ds_used_spare; 3932 } 3933 } 3934 return 0; 3935 } 3936 3937 int 3938 rf_get_component_label(RF_Raid_t *raidPtr, void *data) 3939 { 3940 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data; 3941 RF_ComponentLabel_t *raid_clabel; 3942 int column = clabel->column; 3943 3944 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare)) 3945 return EINVAL; 3946 raid_clabel = raidget_component_label(raidPtr, column); 3947 memcpy(clabel, raid_clabel, sizeof *clabel); 3948 /* Fix-up for userland. */ 3949 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) 3950 clabel->version = RF_COMPONENT_LABEL_VERSION; 3951 3952 return 0; 3953 } 3954 3955 /* 3956 * Module interface 3957 */ 3958 3959 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs"); 3960 3961 #ifdef _MODULE 3962 CFDRIVER_DECL(raid, DV_DISK, NULL); 3963 #endif 3964 3965 static int raid_modcmd(modcmd_t, void *); 3966 static int raid_modcmd_init(void); 3967 static int raid_modcmd_fini(void); 3968 3969 static int 3970 raid_modcmd(modcmd_t cmd, void *data) 3971 { 3972 int error; 3973 3974 error = 0; 3975 switch (cmd) { 3976 case MODULE_CMD_INIT: 3977 error = raid_modcmd_init(); 3978 break; 3979 case MODULE_CMD_FINI: 3980 error = raid_modcmd_fini(); 3981 break; 3982 default: 3983 error = ENOTTY; 3984 break; 3985 } 3986 return error; 3987 } 3988 3989 static int 3990 raid_modcmd_init(void) 3991 { 3992 int error; 3993 int bmajor, cmajor; 3994 3995 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE); 3996 mutex_enter(&raid_lock); 3997 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 3998 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM); 3999 rf_init_cond2(rf_sparet_wait_cv, "sparetw"); 4000 rf_init_cond2(rf_sparet_resp_cv, "rfgst"); 4001 4002 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 4003 #endif 4004 4005 bmajor = cmajor = -1; 4006 error = devsw_attach("raid", &raid_bdevsw, &bmajor, 4007 &raid_cdevsw, &cmajor); 4008 if (error != 0 && error != EEXIST) { 4009 aprint_error("%s: devsw_attach failed %d\n", __func__, error); 4010 mutex_exit(&raid_lock); 4011 return error; 4012 } 4013 #ifdef _MODULE 4014 error = config_cfdriver_attach(&raid_cd); 4015 if (error != 0) { 4016 aprint_error("%s: config_cfdriver_attach failed %d\n", 4017 __func__, error); 4018 devsw_detach(&raid_bdevsw, &raid_cdevsw); 4019 mutex_exit(&raid_lock); 4020 return error; 4021 } 4022 #endif 4023 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca); 4024 if (error != 0) { 4025 aprint_error("%s: config_cfattach_attach failed %d\n", 4026 __func__, error); 4027 #ifdef _MODULE 4028 config_cfdriver_detach(&raid_cd); 4029 #endif 4030 devsw_detach(&raid_bdevsw, &raid_cdevsw); 4031 mutex_exit(&raid_lock); 4032 return error; 4033 } 4034 4035 raidautoconfigdone = false; 4036 4037 mutex_exit(&raid_lock); 4038 4039 if (error == 0) { 4040 if (rf_BootRaidframe(true) == 0) 4041 aprint_verbose("Kernelized RAIDframe activated\n"); 4042 else 4043 panic("Serious error activating RAID!!"); 4044 } 4045 4046 /* 4047 * Register a finalizer which will be used to auto-config RAID 4048 * sets once all real hardware devices have been found. 4049 */ 4050 error = config_finalize_register(NULL, rf_autoconfig); 4051 if (error != 0) { 4052 aprint_error("WARNING: unable to register RAIDframe " 4053 "finalizer\n"); 4054 error = 0; 4055 } 4056 4057 return error; 4058 } 4059 4060 static int 4061 raid_modcmd_fini(void) 4062 { 4063 int error; 4064 4065 mutex_enter(&raid_lock); 4066 4067 /* Don't allow unload if raid device(s) exist. */ 4068 if (!LIST_EMPTY(&raids)) { 4069 mutex_exit(&raid_lock); 4070 return EBUSY; 4071 } 4072 4073 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca); 4074 if (error != 0) { 4075 aprint_error("%s: cannot detach cfattach\n",__func__); 4076 mutex_exit(&raid_lock); 4077 return error; 4078 } 4079 #ifdef _MODULE 4080 error = config_cfdriver_detach(&raid_cd); 4081 if (error != 0) { 4082 aprint_error("%s: cannot detach cfdriver\n",__func__); 4083 config_cfattach_attach(raid_cd.cd_name, &raid_ca); 4084 mutex_exit(&raid_lock); 4085 return error; 4086 } 4087 #endif 4088 devsw_detach(&raid_bdevsw, &raid_cdevsw); 4089 rf_BootRaidframe(false); 4090 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 4091 rf_destroy_mutex2(rf_sparet_wait_mutex); 4092 rf_destroy_cond2(rf_sparet_wait_cv); 4093 rf_destroy_cond2(rf_sparet_resp_cv); 4094 #endif 4095 mutex_exit(&raid_lock); 4096 mutex_destroy(&raid_lock); 4097 4098 return error; 4099 } 4100