1 /* $NetBSD: rf_netbsdkintf.c,v 1.401 2021/09/09 23:26:37 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Greg Oster; Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * Copyright (c) 1995 Carnegie-Mellon University. 72 * All rights reserved. 73 * 74 * Authors: Mark Holland, Jim Zelenka 75 * 76 * Permission to use, copy, modify and distribute this software and 77 * its documentation is hereby granted, provided that both the copyright 78 * notice and this permission notice appear in all copies of the 79 * software, derivative works or modified versions, and any portions 80 * thereof, and that both notices appear in supporting documentation. 81 * 82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 85 * 86 * Carnegie Mellon requests users of this software to return to 87 * 88 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 89 * School of Computer Science 90 * Carnegie Mellon University 91 * Pittsburgh PA 15213-3890 92 * 93 * any improvements or extensions that they make and grant Carnegie the 94 * rights to redistribute these changes. 95 */ 96 97 /*********************************************************** 98 * 99 * rf_kintf.c -- the kernel interface routines for RAIDframe 100 * 101 ***********************************************************/ 102 103 #include <sys/cdefs.h> 104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.401 2021/09/09 23:26:37 riastradh Exp $"); 105 106 #ifdef _KERNEL_OPT 107 #include "opt_raid_autoconfig.h" 108 #include "opt_compat_netbsd32.h" 109 #endif 110 111 #include <sys/param.h> 112 #include <sys/errno.h> 113 #include <sys/pool.h> 114 #include <sys/proc.h> 115 #include <sys/queue.h> 116 #include <sys/disk.h> 117 #include <sys/device.h> 118 #include <sys/stat.h> 119 #include <sys/ioctl.h> 120 #include <sys/fcntl.h> 121 #include <sys/systm.h> 122 #include <sys/vnode.h> 123 #include <sys/disklabel.h> 124 #include <sys/conf.h> 125 #include <sys/buf.h> 126 #include <sys/bufq.h> 127 #include <sys/reboot.h> 128 #include <sys/kauth.h> 129 #include <sys/module.h> 130 #include <sys/compat_stub.h> 131 132 #include <prop/proplib.h> 133 134 #include <dev/raidframe/raidframevar.h> 135 #include <dev/raidframe/raidframeio.h> 136 #include <dev/raidframe/rf_paritymap.h> 137 138 #include "rf_raid.h" 139 #include "rf_copyback.h" 140 #include "rf_dag.h" 141 #include "rf_dagflags.h" 142 #include "rf_desc.h" 143 #include "rf_diskqueue.h" 144 #include "rf_etimer.h" 145 #include "rf_general.h" 146 #include "rf_kintf.h" 147 #include "rf_options.h" 148 #include "rf_driver.h" 149 #include "rf_parityscan.h" 150 #include "rf_threadstuff.h" 151 152 #include "ioconf.h" 153 154 #ifdef DEBUG 155 int rf_kdebug_level = 0; 156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 157 #else /* DEBUG */ 158 #define db1_printf(a) { } 159 #endif /* DEBUG */ 160 161 #ifdef DEBUG_ROOT 162 #define DPRINTF(a, ...) printf(a, __VA_ARGS__) 163 #else 164 #define DPRINTF(a, ...) 165 #endif 166 167 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 168 static rf_declare_mutex2(rf_sparet_wait_mutex); 169 static rf_declare_cond2(rf_sparet_wait_cv); 170 static rf_declare_cond2(rf_sparet_resp_cv); 171 172 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 173 * spare table */ 174 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 175 * installation process */ 176 #endif 177 178 const int rf_b_pass = (B_PHYS|B_RAW|B_MEDIA_FLAGS); 179 180 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 181 182 /* prototypes */ 183 static void KernelWakeupFunc(struct buf *); 184 static void InitBP(struct buf *, struct vnode *, unsigned, 185 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 186 void *, int); 187 static void raidinit(struct raid_softc *); 188 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp); 189 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *); 190 191 static int raid_match(device_t, cfdata_t, void *); 192 static void raid_attach(device_t, device_t, void *); 193 static int raid_detach(device_t, int); 194 195 static int raidread_component_area(dev_t, struct vnode *, void *, size_t, 196 daddr_t, daddr_t); 197 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t, 198 daddr_t, daddr_t, int); 199 200 static int raidwrite_component_label(unsigned, 201 dev_t, struct vnode *, RF_ComponentLabel_t *); 202 static int raidread_component_label(unsigned, 203 dev_t, struct vnode *, RF_ComponentLabel_t *); 204 205 static int raid_diskstart(device_t, struct buf *bp); 206 static int raid_dumpblocks(device_t, void *, daddr_t, int); 207 static int raid_lastclose(device_t); 208 209 static dev_type_open(raidopen); 210 static dev_type_close(raidclose); 211 static dev_type_read(raidread); 212 static dev_type_write(raidwrite); 213 static dev_type_ioctl(raidioctl); 214 static dev_type_strategy(raidstrategy); 215 static dev_type_dump(raiddump); 216 static dev_type_size(raidsize); 217 218 const struct bdevsw raid_bdevsw = { 219 .d_open = raidopen, 220 .d_close = raidclose, 221 .d_strategy = raidstrategy, 222 .d_ioctl = raidioctl, 223 .d_dump = raiddump, 224 .d_psize = raidsize, 225 .d_discard = nodiscard, 226 .d_flag = D_DISK 227 }; 228 229 const struct cdevsw raid_cdevsw = { 230 .d_open = raidopen, 231 .d_close = raidclose, 232 .d_read = raidread, 233 .d_write = raidwrite, 234 .d_ioctl = raidioctl, 235 .d_stop = nostop, 236 .d_tty = notty, 237 .d_poll = nopoll, 238 .d_mmap = nommap, 239 .d_kqfilter = nokqfilter, 240 .d_discard = nodiscard, 241 .d_flag = D_DISK 242 }; 243 244 static struct dkdriver rf_dkdriver = { 245 .d_open = raidopen, 246 .d_close = raidclose, 247 .d_strategy = raidstrategy, 248 .d_diskstart = raid_diskstart, 249 .d_dumpblocks = raid_dumpblocks, 250 .d_lastclose = raid_lastclose, 251 .d_minphys = minphys 252 }; 253 254 #define raidunit(x) DISKUNIT(x) 255 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc) 256 257 extern struct cfdriver raid_cd; 258 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc), 259 raid_match, raid_attach, raid_detach, NULL, NULL, NULL, 260 DVF_DETACH_SHUTDOWN); 261 262 /* Internal representation of a rf_recon_req */ 263 struct rf_recon_req_internal { 264 RF_RowCol_t col; 265 RF_ReconReqFlags_t flags; 266 void *raidPtr; 267 }; 268 269 /* 270 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 271 * Be aware that large numbers can allow the driver to consume a lot of 272 * kernel memory, especially on writes, and in degraded mode reads. 273 * 274 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 275 * a single 64K write will typically require 64K for the old data, 276 * 64K for the old parity, and 64K for the new parity, for a total 277 * of 192K (if the parity buffer is not re-used immediately). 278 * Even it if is used immediately, that's still 128K, which when multiplied 279 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 280 * 281 * Now in degraded mode, for example, a 64K read on the above setup may 282 * require data reconstruction, which will require *all* of the 4 remaining 283 * disks to participate -- 4 * 32K/disk == 128K again. 284 */ 285 286 #ifndef RAIDOUTSTANDING 287 #define RAIDOUTSTANDING 6 288 #endif 289 290 #define RAIDLABELDEV(dev) \ 291 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 292 293 /* declared here, and made public, for the benefit of KVM stuff.. */ 294 295 static int raidlock(struct raid_softc *); 296 static void raidunlock(struct raid_softc *); 297 298 static int raid_detach_unlocked(struct raid_softc *); 299 300 static void rf_markalldirty(RF_Raid_t *); 301 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *); 302 303 static void rf_ReconThread(struct rf_recon_req_internal *); 304 static void rf_RewriteParityThread(RF_Raid_t *raidPtr); 305 static void rf_CopybackThread(RF_Raid_t *raidPtr); 306 static void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *); 307 static int rf_autoconfig(device_t); 308 static int rf_rescan(void); 309 static void rf_buildroothack(RF_ConfigSet_t *); 310 311 static RF_AutoConfig_t *rf_find_raid_components(void); 312 static RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 313 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 314 static void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 315 static int rf_set_autoconfig(RF_Raid_t *, int); 316 static int rf_set_rootpartition(RF_Raid_t *, int); 317 static void rf_release_all_vps(RF_ConfigSet_t *); 318 static void rf_cleanup_config_set(RF_ConfigSet_t *); 319 static int rf_have_enough_components(RF_ConfigSet_t *); 320 static struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *); 321 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t); 322 323 /* 324 * Debugging, mostly. Set to 0 to not allow autoconfig to take place. 325 * Note that this is overridden by having RAID_AUTOCONFIG as an option 326 * in the kernel config file. 327 */ 328 #ifdef RAID_AUTOCONFIG 329 int raidautoconfig = 1; 330 #else 331 int raidautoconfig = 0; 332 #endif 333 static bool raidautoconfigdone = false; 334 335 struct pool rf_alloclist_pool; /* AllocList */ 336 337 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids); 338 static kmutex_t raid_lock; 339 340 static struct raid_softc * 341 raidcreate(int unit) { 342 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 343 sc->sc_unit = unit; 344 cv_init(&sc->sc_cv, "raidunit"); 345 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE); 346 return sc; 347 } 348 349 static void 350 raiddestroy(struct raid_softc *sc) { 351 cv_destroy(&sc->sc_cv); 352 mutex_destroy(&sc->sc_mutex); 353 kmem_free(sc, sizeof(*sc)); 354 } 355 356 static struct raid_softc * 357 raidget(int unit, bool create) { 358 struct raid_softc *sc; 359 if (unit < 0) { 360 #ifdef DIAGNOSTIC 361 panic("%s: unit %d!", __func__, unit); 362 #endif 363 return NULL; 364 } 365 mutex_enter(&raid_lock); 366 LIST_FOREACH(sc, &raids, sc_link) { 367 if (sc->sc_unit == unit) { 368 mutex_exit(&raid_lock); 369 return sc; 370 } 371 } 372 mutex_exit(&raid_lock); 373 if (!create) 374 return NULL; 375 sc = raidcreate(unit); 376 mutex_enter(&raid_lock); 377 LIST_INSERT_HEAD(&raids, sc, sc_link); 378 mutex_exit(&raid_lock); 379 return sc; 380 } 381 382 static void 383 raidput(struct raid_softc *sc) { 384 mutex_enter(&raid_lock); 385 LIST_REMOVE(sc, sc_link); 386 mutex_exit(&raid_lock); 387 raiddestroy(sc); 388 } 389 390 void 391 raidattach(int num) 392 { 393 394 /* 395 * Device attachment and associated initialization now occurs 396 * as part of the module initialization. 397 */ 398 } 399 400 static int 401 rf_autoconfig(device_t self) 402 { 403 RF_AutoConfig_t *ac_list; 404 RF_ConfigSet_t *config_sets; 405 406 if (!raidautoconfig || raidautoconfigdone == true) 407 return 0; 408 409 /* XXX This code can only be run once. */ 410 raidautoconfigdone = true; 411 412 #ifdef __HAVE_CPU_BOOTCONF 413 /* 414 * 0. find the boot device if needed first so we can use it later 415 * this needs to be done before we autoconfigure any raid sets, 416 * because if we use wedges we are not going to be able to open 417 * the boot device later 418 */ 419 if (booted_device == NULL) 420 cpu_bootconf(); 421 #endif 422 /* 1. locate all RAID components on the system */ 423 aprint_debug("Searching for RAID components...\n"); 424 ac_list = rf_find_raid_components(); 425 426 /* 2. Sort them into their respective sets. */ 427 config_sets = rf_create_auto_sets(ac_list); 428 429 /* 430 * 3. Evaluate each set and configure the valid ones. 431 * This gets done in rf_buildroothack(). 432 */ 433 rf_buildroothack(config_sets); 434 435 return 1; 436 } 437 438 int 439 rf_inited(const struct raid_softc *rs) { 440 return (rs->sc_flags & RAIDF_INITED) != 0; 441 } 442 443 RF_Raid_t * 444 rf_get_raid(struct raid_softc *rs) { 445 return &rs->sc_r; 446 } 447 448 int 449 rf_get_unit(const struct raid_softc *rs) { 450 return rs->sc_unit; 451 } 452 453 static int 454 rf_containsboot(RF_Raid_t *r, device_t bdv) { 455 const char *bootname; 456 size_t len; 457 458 /* if bdv is NULL, the set can't contain it. exit early. */ 459 if (bdv == NULL) 460 return 0; 461 462 bootname = device_xname(bdv); 463 len = strlen(bootname); 464 465 for (int col = 0; col < r->numCol; col++) { 466 const char *devname = r->Disks[col].devname; 467 devname += sizeof("/dev/") - 1; 468 if (strncmp(devname, "dk", 2) == 0) { 469 const char *parent = 470 dkwedge_get_parent_name(r->Disks[col].dev); 471 if (parent != NULL) 472 devname = parent; 473 } 474 if (strncmp(devname, bootname, len) == 0) { 475 struct raid_softc *sc = r->softc; 476 aprint_debug("raid%d includes boot device %s\n", 477 sc->sc_unit, devname); 478 return 1; 479 } 480 } 481 return 0; 482 } 483 484 static int 485 rf_rescan(void) 486 { 487 RF_AutoConfig_t *ac_list; 488 RF_ConfigSet_t *config_sets, *cset, *next_cset; 489 struct raid_softc *sc; 490 int raid_added; 491 492 ac_list = rf_find_raid_components(); 493 config_sets = rf_create_auto_sets(ac_list); 494 495 raid_added = 1; 496 while (raid_added > 0) { 497 raid_added = 0; 498 cset = config_sets; 499 while (cset != NULL) { 500 next_cset = cset->next; 501 if (rf_have_enough_components(cset) && 502 cset->ac->clabel->autoconfigure == 1) { 503 sc = rf_auto_config_set(cset); 504 if (sc != NULL) { 505 aprint_debug("raid%d: configured ok, rootable %d\n", 506 sc->sc_unit, cset->rootable); 507 /* We added one RAID set */ 508 raid_added++; 509 } else { 510 /* The autoconfig didn't work :( */ 511 aprint_debug("Autoconfig failed\n"); 512 rf_release_all_vps(cset); 513 } 514 } else { 515 /* we're not autoconfiguring this set... 516 release the associated resources */ 517 rf_release_all_vps(cset); 518 } 519 /* cleanup */ 520 rf_cleanup_config_set(cset); 521 cset = next_cset; 522 } 523 if (raid_added > 0) { 524 /* We added at least one RAID set, so re-scan for recursive RAID */ 525 ac_list = rf_find_raid_components(); 526 config_sets = rf_create_auto_sets(ac_list); 527 } 528 } 529 530 return 0; 531 } 532 533 534 static void 535 rf_buildroothack(RF_ConfigSet_t *config_sets) 536 { 537 RF_AutoConfig_t *ac_list; 538 RF_ConfigSet_t *cset; 539 RF_ConfigSet_t *next_cset; 540 int num_root; 541 int raid_added; 542 struct raid_softc *sc, *rsc; 543 struct dk_softc *dksc = NULL; /* XXX gcc -Os: may be used uninit. */ 544 545 sc = rsc = NULL; 546 num_root = 0; 547 548 raid_added = 1; 549 while (raid_added > 0) { 550 raid_added = 0; 551 cset = config_sets; 552 while (cset != NULL) { 553 next_cset = cset->next; 554 if (rf_have_enough_components(cset) && 555 cset->ac->clabel->autoconfigure == 1) { 556 sc = rf_auto_config_set(cset); 557 if (sc != NULL) { 558 aprint_debug("raid%d: configured ok, rootable %d\n", 559 sc->sc_unit, cset->rootable); 560 /* We added one RAID set */ 561 raid_added++; 562 if (cset->rootable) { 563 rsc = sc; 564 num_root++; 565 } 566 } else { 567 /* The autoconfig didn't work :( */ 568 aprint_debug("Autoconfig failed\n"); 569 rf_release_all_vps(cset); 570 } 571 } else { 572 /* we're not autoconfiguring this set... 573 release the associated resources */ 574 rf_release_all_vps(cset); 575 } 576 /* cleanup */ 577 rf_cleanup_config_set(cset); 578 cset = next_cset; 579 } 580 if (raid_added > 0) { 581 /* We added at least one RAID set, so re-scan for recursive RAID */ 582 ac_list = rf_find_raid_components(); 583 config_sets = rf_create_auto_sets(ac_list); 584 } 585 } 586 587 /* if the user has specified what the root device should be 588 then we don't touch booted_device or boothowto... */ 589 590 if (rootspec != NULL) { 591 DPRINTF("%s: rootspec %s\n", __func__, rootspec); 592 return; 593 } 594 595 /* we found something bootable... */ 596 597 /* 598 * XXX: The following code assumes that the root raid 599 * is the first ('a') partition. This is about the best 600 * we can do with a BSD disklabel, but we might be able 601 * to do better with a GPT label, by setting a specified 602 * attribute to indicate the root partition. We can then 603 * stash the partition number in the r->root_partition 604 * high bits (the bottom 2 bits are already used). For 605 * now we just set booted_partition to 0 when we override 606 * root. 607 */ 608 if (num_root == 1) { 609 device_t candidate_root; 610 dksc = &rsc->sc_dksc; 611 if (dksc->sc_dkdev.dk_nwedges != 0) { 612 char cname[sizeof(cset->ac->devname)]; 613 /* XXX: assume partition 'a' first */ 614 snprintf(cname, sizeof(cname), "%s%c", 615 device_xname(dksc->sc_dev), 'a'); 616 candidate_root = dkwedge_find_by_wname(cname); 617 DPRINTF("%s: candidate wedge root=%s\n", __func__, 618 cname); 619 if (candidate_root == NULL) { 620 /* 621 * If that is not found, because we don't use 622 * disklabel, return the first dk child 623 * XXX: we can skip the 'a' check above 624 * and always do this... 625 */ 626 size_t i = 0; 627 candidate_root = dkwedge_find_by_parent( 628 device_xname(dksc->sc_dev), &i); 629 } 630 DPRINTF("%s: candidate wedge root=%p\n", __func__, 631 candidate_root); 632 } else 633 candidate_root = dksc->sc_dev; 634 DPRINTF("%s: candidate root=%p\n", __func__, candidate_root); 635 DPRINTF("%s: booted_device=%p root_partition=%d " 636 "contains_boot=%d", 637 __func__, booted_device, rsc->sc_r.root_partition, 638 rf_containsboot(&rsc->sc_r, booted_device)); 639 /* XXX the check for booted_device == NULL can probably be 640 * dropped, now that rf_containsboot handles that case. 641 */ 642 if (booted_device == NULL || 643 rsc->sc_r.root_partition == 1 || 644 rf_containsboot(&rsc->sc_r, booted_device)) { 645 booted_device = candidate_root; 646 booted_method = "raidframe/single"; 647 booted_partition = 0; /* XXX assume 'a' */ 648 DPRINTF("%s: set booted_device=%s(%p)\n", __func__, 649 device_xname(booted_device), booted_device); 650 } 651 } else if (num_root > 1) { 652 DPRINTF("%s: many roots=%d, %p\n", __func__, num_root, 653 booted_device); 654 655 /* 656 * Maybe the MD code can help. If it cannot, then 657 * setroot() will discover that we have no 658 * booted_device and will ask the user if nothing was 659 * hardwired in the kernel config file 660 */ 661 if (booted_device == NULL) 662 return; 663 664 num_root = 0; 665 mutex_enter(&raid_lock); 666 LIST_FOREACH(sc, &raids, sc_link) { 667 RF_Raid_t *r = &sc->sc_r; 668 if (r->valid == 0) 669 continue; 670 671 if (r->root_partition == 0) 672 continue; 673 674 if (rf_containsboot(r, booted_device)) { 675 num_root++; 676 rsc = sc; 677 dksc = &rsc->sc_dksc; 678 } 679 } 680 mutex_exit(&raid_lock); 681 682 if (num_root == 1) { 683 booted_device = dksc->sc_dev; 684 booted_method = "raidframe/multi"; 685 booted_partition = 0; /* XXX assume 'a' */ 686 } else { 687 /* we can't guess.. require the user to answer... */ 688 boothowto |= RB_ASKNAME; 689 } 690 } 691 } 692 693 static int 694 raidsize(dev_t dev) 695 { 696 struct raid_softc *rs; 697 struct dk_softc *dksc; 698 unsigned int unit; 699 700 unit = raidunit(dev); 701 if ((rs = raidget(unit, false)) == NULL) 702 return -1; 703 dksc = &rs->sc_dksc; 704 705 if ((rs->sc_flags & RAIDF_INITED) == 0) 706 return -1; 707 708 return dk_size(dksc, dev); 709 } 710 711 static int 712 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 713 { 714 unsigned int unit; 715 struct raid_softc *rs; 716 struct dk_softc *dksc; 717 718 unit = raidunit(dev); 719 if ((rs = raidget(unit, false)) == NULL) 720 return ENXIO; 721 dksc = &rs->sc_dksc; 722 723 if ((rs->sc_flags & RAIDF_INITED) == 0) 724 return ENODEV; 725 726 /* 727 Note that blkno is relative to this particular partition. 728 By adding adding RF_PROTECTED_SECTORS, we get a value that 729 is relative to the partition used for the underlying component. 730 */ 731 blkno += RF_PROTECTED_SECTORS; 732 733 return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE); 734 } 735 736 static int 737 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk) 738 { 739 struct raid_softc *rs = raidsoftc(dev); 740 const struct bdevsw *bdev; 741 RF_Raid_t *raidPtr; 742 int c, sparecol, j, scol, dumpto; 743 int error = 0; 744 745 raidPtr = &rs->sc_r; 746 747 /* we only support dumping to RAID 1 sets */ 748 if (raidPtr->Layout.numDataCol != 1 || 749 raidPtr->Layout.numParityCol != 1) 750 return EINVAL; 751 752 if ((error = raidlock(rs)) != 0) 753 return error; 754 755 /* figure out what device is alive.. */ 756 757 /* 758 Look for a component to dump to. The preference for the 759 component to dump to is as follows: 760 1) the first component 761 2) a used_spare of the first component 762 3) the second component 763 4) a used_spare of the second component 764 */ 765 766 dumpto = -1; 767 for (c = 0; c < raidPtr->numCol; c++) { 768 if (raidPtr->Disks[c].status == rf_ds_optimal) { 769 /* this might be the one */ 770 dumpto = c; 771 break; 772 } 773 } 774 775 /* 776 At this point we have possibly selected a live component. 777 If we didn't find a live ocmponent, we now check to see 778 if there is a relevant spared component. 779 */ 780 781 for (c = 0; c < raidPtr->numSpare; c++) { 782 sparecol = raidPtr->numCol + c; 783 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 784 /* How about this one? */ 785 scol = -1; 786 for(j=0;j<raidPtr->numCol;j++) { 787 if (raidPtr->Disks[j].spareCol == sparecol) { 788 scol = j; 789 break; 790 } 791 } 792 if (scol == 0) { 793 /* 794 We must have found a spared first 795 component! We'll take that over 796 anything else found so far. (We 797 couldn't have found a real first 798 component before, since this is a 799 used spare, and it's saying that 800 it's replacing the first 801 component.) On reboot (with 802 autoconfiguration turned on) 803 sparecol will become the first 804 component (component0) of this set. 805 */ 806 dumpto = sparecol; 807 break; 808 } else if (scol != -1) { 809 /* 810 Must be a spared second component. 811 We'll dump to that if we havn't found 812 anything else so far. 813 */ 814 if (dumpto == -1) 815 dumpto = sparecol; 816 } 817 } 818 } 819 820 if (dumpto == -1) { 821 /* we couldn't find any live components to dump to!?!? 822 */ 823 error = EINVAL; 824 goto out; 825 } 826 827 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 828 if (bdev == NULL) { 829 error = ENXIO; 830 goto out; 831 } 832 833 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 834 blkno, va, nblk * raidPtr->bytesPerSector); 835 836 out: 837 raidunlock(rs); 838 839 return error; 840 } 841 842 /* ARGSUSED */ 843 static int 844 raidopen(dev_t dev, int flags, int fmt, 845 struct lwp *l) 846 { 847 int unit = raidunit(dev); 848 struct raid_softc *rs; 849 struct dk_softc *dksc; 850 int error = 0; 851 int part, pmask; 852 853 if ((rs = raidget(unit, true)) == NULL) 854 return ENXIO; 855 if ((error = raidlock(rs)) != 0) 856 return error; 857 858 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) { 859 error = EBUSY; 860 goto bad; 861 } 862 863 dksc = &rs->sc_dksc; 864 865 part = DISKPART(dev); 866 pmask = (1 << part); 867 868 if (!DK_BUSY(dksc, pmask) && 869 ((rs->sc_flags & RAIDF_INITED) != 0)) { 870 /* First one... mark things as dirty... Note that we *MUST* 871 have done a configure before this. I DO NOT WANT TO BE 872 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 873 THAT THEY BELONG TOGETHER!!!!! */ 874 /* XXX should check to see if we're only open for reading 875 here... If so, we needn't do this, but then need some 876 other way of keeping track of what's happened.. */ 877 878 rf_markalldirty(&rs->sc_r); 879 } 880 881 if ((rs->sc_flags & RAIDF_INITED) != 0) 882 error = dk_open(dksc, dev, flags, fmt, l); 883 884 bad: 885 raidunlock(rs); 886 887 return error; 888 889 890 } 891 892 static int 893 raid_lastclose(device_t self) 894 { 895 struct raid_softc *rs = raidsoftc(self); 896 897 /* Last one... device is not unconfigured yet. 898 Device shutdown has taken care of setting the 899 clean bits if RAIDF_INITED is not set 900 mark things as clean... */ 901 902 rf_update_component_labels(&rs->sc_r, 903 RF_FINAL_COMPONENT_UPDATE); 904 905 /* pass to unlocked code */ 906 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) 907 rs->sc_flags |= RAIDF_DETACH; 908 909 return 0; 910 } 911 912 /* ARGSUSED */ 913 static int 914 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 915 { 916 int unit = raidunit(dev); 917 struct raid_softc *rs; 918 struct dk_softc *dksc; 919 cfdata_t cf; 920 int error = 0, do_detach = 0, do_put = 0; 921 922 if ((rs = raidget(unit, false)) == NULL) 923 return ENXIO; 924 dksc = &rs->sc_dksc; 925 926 if ((error = raidlock(rs)) != 0) 927 return error; 928 929 if ((rs->sc_flags & RAIDF_INITED) != 0) { 930 error = dk_close(dksc, dev, flags, fmt, l); 931 if ((rs->sc_flags & RAIDF_DETACH) != 0) 932 do_detach = 1; 933 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) 934 do_put = 1; 935 936 raidunlock(rs); 937 938 if (do_detach) { 939 /* free the pseudo device attach bits */ 940 cf = device_cfdata(dksc->sc_dev); 941 error = config_detach(dksc->sc_dev, 0); 942 if (error == 0) 943 free(cf, M_RAIDFRAME); 944 } else if (do_put) { 945 raidput(rs); 946 } 947 948 return error; 949 950 } 951 952 static void 953 raid_wakeup(RF_Raid_t *raidPtr) 954 { 955 rf_lock_mutex2(raidPtr->iodone_lock); 956 rf_signal_cond2(raidPtr->iodone_cv); 957 rf_unlock_mutex2(raidPtr->iodone_lock); 958 } 959 960 static void 961 raidstrategy(struct buf *bp) 962 { 963 unsigned int unit; 964 struct raid_softc *rs; 965 struct dk_softc *dksc; 966 RF_Raid_t *raidPtr; 967 968 unit = raidunit(bp->b_dev); 969 if ((rs = raidget(unit, false)) == NULL) { 970 bp->b_error = ENXIO; 971 goto fail; 972 } 973 if ((rs->sc_flags & RAIDF_INITED) == 0) { 974 bp->b_error = ENXIO; 975 goto fail; 976 } 977 dksc = &rs->sc_dksc; 978 raidPtr = &rs->sc_r; 979 980 /* Queue IO only */ 981 if (dk_strategy_defer(dksc, bp)) 982 goto done; 983 984 /* schedule the IO to happen at the next convenient time */ 985 raid_wakeup(raidPtr); 986 987 done: 988 return; 989 990 fail: 991 bp->b_resid = bp->b_bcount; 992 biodone(bp); 993 } 994 995 static int 996 raid_diskstart(device_t dev, struct buf *bp) 997 { 998 struct raid_softc *rs = raidsoftc(dev); 999 RF_Raid_t *raidPtr; 1000 1001 raidPtr = &rs->sc_r; 1002 if (!raidPtr->valid) { 1003 db1_printf(("raid is not valid..\n")); 1004 return ENODEV; 1005 } 1006 1007 /* XXX */ 1008 bp->b_resid = 0; 1009 1010 return raiddoaccess(raidPtr, bp); 1011 } 1012 1013 void 1014 raiddone(RF_Raid_t *raidPtr, struct buf *bp) 1015 { 1016 struct raid_softc *rs; 1017 struct dk_softc *dksc; 1018 1019 rs = raidPtr->softc; 1020 dksc = &rs->sc_dksc; 1021 1022 dk_done(dksc, bp); 1023 1024 rf_lock_mutex2(raidPtr->mutex); 1025 raidPtr->openings++; 1026 rf_unlock_mutex2(raidPtr->mutex); 1027 1028 /* schedule more IO */ 1029 raid_wakeup(raidPtr); 1030 } 1031 1032 /* ARGSUSED */ 1033 static int 1034 raidread(dev_t dev, struct uio *uio, int flags) 1035 { 1036 int unit = raidunit(dev); 1037 struct raid_softc *rs; 1038 1039 if ((rs = raidget(unit, false)) == NULL) 1040 return ENXIO; 1041 1042 if ((rs->sc_flags & RAIDF_INITED) == 0) 1043 return ENXIO; 1044 1045 return physio(raidstrategy, NULL, dev, B_READ, minphys, uio); 1046 1047 } 1048 1049 /* ARGSUSED */ 1050 static int 1051 raidwrite(dev_t dev, struct uio *uio, int flags) 1052 { 1053 int unit = raidunit(dev); 1054 struct raid_softc *rs; 1055 1056 if ((rs = raidget(unit, false)) == NULL) 1057 return ENXIO; 1058 1059 if ((rs->sc_flags & RAIDF_INITED) == 0) 1060 return ENXIO; 1061 1062 return physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio); 1063 1064 } 1065 1066 static int 1067 raid_detach_unlocked(struct raid_softc *rs) 1068 { 1069 struct dk_softc *dksc = &rs->sc_dksc; 1070 RF_Raid_t *raidPtr; 1071 int error; 1072 1073 raidPtr = &rs->sc_r; 1074 1075 if (DK_BUSY(dksc, 0) || 1076 raidPtr->recon_in_progress != 0 || 1077 raidPtr->parity_rewrite_in_progress != 0 || 1078 raidPtr->copyback_in_progress != 0) 1079 return EBUSY; 1080 1081 if ((rs->sc_flags & RAIDF_INITED) == 0) 1082 return 0; 1083 1084 rs->sc_flags &= ~RAIDF_SHUTDOWN; 1085 1086 if ((error = rf_Shutdown(raidPtr)) != 0) 1087 return error; 1088 1089 rs->sc_flags &= ~RAIDF_INITED; 1090 1091 /* Kill off any queued buffers */ 1092 dk_drain(dksc); 1093 bufq_free(dksc->sc_bufq); 1094 1095 /* Detach the disk. */ 1096 dkwedge_delall(&dksc->sc_dkdev); 1097 disk_detach(&dksc->sc_dkdev); 1098 disk_destroy(&dksc->sc_dkdev); 1099 dk_detach(dksc); 1100 1101 return 0; 1102 } 1103 1104 static bool 1105 rf_must_be_initialized(const struct raid_softc *rs, u_long cmd) 1106 { 1107 switch (cmd) { 1108 case RAIDFRAME_ADD_HOT_SPARE: 1109 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1110 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1111 case RAIDFRAME_CHECK_PARITY: 1112 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1113 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1114 case RAIDFRAME_CHECK_RECON_STATUS: 1115 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1116 case RAIDFRAME_COPYBACK: 1117 case RAIDFRAME_DELETE_COMPONENT: 1118 case RAIDFRAME_FAIL_DISK: 1119 case RAIDFRAME_GET_ACCTOTALS: 1120 case RAIDFRAME_GET_COMPONENT_LABEL: 1121 case RAIDFRAME_GET_INFO: 1122 case RAIDFRAME_GET_SIZE: 1123 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1124 case RAIDFRAME_INIT_LABELS: 1125 case RAIDFRAME_KEEP_ACCTOTALS: 1126 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1127 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1128 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1129 case RAIDFRAME_PARITYMAP_STATUS: 1130 case RAIDFRAME_REBUILD_IN_PLACE: 1131 case RAIDFRAME_REMOVE_HOT_SPARE: 1132 case RAIDFRAME_RESET_ACCTOTALS: 1133 case RAIDFRAME_REWRITEPARITY: 1134 case RAIDFRAME_SET_AUTOCONFIG: 1135 case RAIDFRAME_SET_COMPONENT_LABEL: 1136 case RAIDFRAME_SET_ROOT: 1137 return (rs->sc_flags & RAIDF_INITED) == 0; 1138 } 1139 return false; 1140 } 1141 1142 int 1143 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr) 1144 { 1145 struct rf_recon_req_internal *rrint; 1146 1147 if (raidPtr->Layout.map->faultsTolerated == 0) { 1148 /* Can't do this on a RAID 0!! */ 1149 return EINVAL; 1150 } 1151 1152 if (rr->col < 0 || rr->col >= raidPtr->numCol) { 1153 /* bad column */ 1154 return EINVAL; 1155 } 1156 1157 rf_lock_mutex2(raidPtr->mutex); 1158 if (raidPtr->status == rf_rs_reconstructing) { 1159 /* you can't fail a disk while we're reconstructing! */ 1160 /* XXX wrong for RAID6 */ 1161 goto out; 1162 } 1163 if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) && 1164 (raidPtr->numFailures > 0)) { 1165 /* some other component has failed. Let's not make 1166 things worse. XXX wrong for RAID6 */ 1167 goto out; 1168 } 1169 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1170 /* Can't fail a spared disk! */ 1171 goto out; 1172 } 1173 rf_unlock_mutex2(raidPtr->mutex); 1174 1175 /* make a copy of the recon request so that we don't rely on 1176 * the user's buffer */ 1177 rrint = RF_Malloc(sizeof(*rrint)); 1178 if (rrint == NULL) 1179 return(ENOMEM); 1180 rrint->col = rr->col; 1181 rrint->flags = rr->flags; 1182 rrint->raidPtr = raidPtr; 1183 1184 return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread, 1185 rrint, "raid_recon"); 1186 out: 1187 rf_unlock_mutex2(raidPtr->mutex); 1188 return EINVAL; 1189 } 1190 1191 static int 1192 rf_copyinspecificbuf(RF_Config_t *k_cfg) 1193 { 1194 /* allocate a buffer for the layout-specific data, and copy it in */ 1195 if (k_cfg->layoutSpecificSize == 0) 1196 return 0; 1197 1198 if (k_cfg->layoutSpecificSize > 10000) { 1199 /* sanity check */ 1200 return EINVAL; 1201 } 1202 1203 u_char *specific_buf; 1204 specific_buf = RF_Malloc(k_cfg->layoutSpecificSize); 1205 if (specific_buf == NULL) 1206 return ENOMEM; 1207 1208 int retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1209 k_cfg->layoutSpecificSize); 1210 if (retcode) { 1211 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1212 db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode)); 1213 return retcode; 1214 } 1215 1216 k_cfg->layoutSpecific = specific_buf; 1217 return 0; 1218 } 1219 1220 static int 1221 rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg) 1222 { 1223 RF_Config_t *u_cfg = *((RF_Config_t **) data); 1224 1225 if (rs->sc_r.valid) { 1226 /* There is a valid RAID set running on this unit! */ 1227 printf("raid%d: Device already configured!\n", rs->sc_unit); 1228 return EINVAL; 1229 } 1230 1231 /* copy-in the configuration information */ 1232 /* data points to a pointer to the configuration structure */ 1233 *k_cfg = RF_Malloc(sizeof(**k_cfg)); 1234 if (*k_cfg == NULL) { 1235 return ENOMEM; 1236 } 1237 int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t)); 1238 if (retcode == 0) 1239 return 0; 1240 RF_Free(*k_cfg, sizeof(RF_Config_t)); 1241 db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode)); 1242 rs->sc_flags |= RAIDF_SHUTDOWN; 1243 return retcode; 1244 } 1245 1246 int 1247 rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg) 1248 { 1249 int retcode; 1250 RF_Raid_t *raidPtr = &rs->sc_r; 1251 1252 rs->sc_flags &= ~RAIDF_SHUTDOWN; 1253 1254 if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0) 1255 goto out; 1256 1257 /* should do some kind of sanity check on the configuration. 1258 * Store the sum of all the bytes in the last byte? */ 1259 1260 /* configure the system */ 1261 1262 /* 1263 * Clear the entire RAID descriptor, just to make sure 1264 * there is no stale data left in the case of a 1265 * reconfiguration 1266 */ 1267 memset(raidPtr, 0, sizeof(*raidPtr)); 1268 raidPtr->softc = rs; 1269 raidPtr->raidid = rs->sc_unit; 1270 1271 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1272 1273 if (retcode == 0) { 1274 /* allow this many simultaneous IO's to 1275 this RAID device */ 1276 raidPtr->openings = RAIDOUTSTANDING; 1277 1278 raidinit(rs); 1279 raid_wakeup(raidPtr); 1280 rf_markalldirty(raidPtr); 1281 } 1282 1283 /* free the buffers. No return code here. */ 1284 if (k_cfg->layoutSpecificSize) { 1285 RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize); 1286 } 1287 out: 1288 RF_Free(k_cfg, sizeof(RF_Config_t)); 1289 if (retcode) { 1290 /* 1291 * If configuration failed, set sc_flags so that we 1292 * will detach the device when we close it. 1293 */ 1294 rs->sc_flags |= RAIDF_SHUTDOWN; 1295 } 1296 return retcode; 1297 } 1298 1299 #if RF_DISABLED 1300 static int 1301 rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 1302 { 1303 1304 /* XXX check the label for valid stuff... */ 1305 /* Note that some things *should not* get modified -- 1306 the user should be re-initing the labels instead of 1307 trying to patch things. 1308 */ 1309 #ifdef DEBUG 1310 int raidid = raidPtr->raidid; 1311 printf("raid%d: Got component label:\n", raidid); 1312 printf("raid%d: Version: %d\n", raidid, clabel->version); 1313 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1314 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1315 printf("raid%d: Column: %d\n", raidid, clabel->column); 1316 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1317 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1318 printf("raid%d: Status: %d\n", raidid, clabel->status); 1319 #endif /* DEBUG */ 1320 clabel->row = 0; 1321 int column = clabel->column; 1322 1323 if ((column < 0) || (column >= raidPtr->numCol)) { 1324 return(EINVAL); 1325 } 1326 1327 /* XXX this isn't allowed to do anything for now :-) */ 1328 1329 /* XXX and before it is, we need to fill in the rest 1330 of the fields!?!?!?! */ 1331 memcpy(raidget_component_label(raidPtr, column), 1332 clabel, sizeof(*clabel)); 1333 raidflush_component_label(raidPtr, column); 1334 return 0; 1335 } 1336 #endif 1337 1338 static int 1339 rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 1340 { 1341 /* 1342 we only want the serial number from 1343 the above. We get all the rest of the information 1344 from the config that was used to create this RAID 1345 set. 1346 */ 1347 1348 raidPtr->serial_number = clabel->serial_number; 1349 1350 for (int column = 0; column < raidPtr->numCol; column++) { 1351 RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column]; 1352 if (RF_DEAD_DISK(diskPtr->status)) 1353 continue; 1354 RF_ComponentLabel_t *ci_label = raidget_component_label( 1355 raidPtr, column); 1356 /* Zeroing this is important. */ 1357 memset(ci_label, 0, sizeof(*ci_label)); 1358 raid_init_component_label(raidPtr, ci_label); 1359 ci_label->serial_number = raidPtr->serial_number; 1360 ci_label->row = 0; /* we dont' pretend to support more */ 1361 rf_component_label_set_partitionsize(ci_label, 1362 diskPtr->partitionSize); 1363 ci_label->column = column; 1364 raidflush_component_label(raidPtr, column); 1365 /* XXXjld what about the spares? */ 1366 } 1367 1368 return 0; 1369 } 1370 1371 static int 1372 rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr) 1373 { 1374 1375 if (raidPtr->Layout.map->faultsTolerated == 0) { 1376 /* Can't do this on a RAID 0!! */ 1377 return EINVAL; 1378 } 1379 1380 if (raidPtr->recon_in_progress == 1) { 1381 /* a reconstruct is already in progress! */ 1382 return EINVAL; 1383 } 1384 1385 RF_SingleComponent_t component; 1386 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t)); 1387 component.row = 0; /* we don't support any more */ 1388 int column = component.column; 1389 1390 if ((column < 0) || (column >= raidPtr->numCol)) { 1391 return EINVAL; 1392 } 1393 1394 rf_lock_mutex2(raidPtr->mutex); 1395 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1396 (raidPtr->numFailures > 0)) { 1397 /* XXX 0 above shouldn't be constant!!! */ 1398 /* some component other than this has failed. 1399 Let's not make things worse than they already 1400 are... */ 1401 printf("raid%d: Unable to reconstruct to disk at:\n", 1402 raidPtr->raidid); 1403 printf("raid%d: Col: %d Too many failures.\n", 1404 raidPtr->raidid, column); 1405 rf_unlock_mutex2(raidPtr->mutex); 1406 return EINVAL; 1407 } 1408 1409 if (raidPtr->Disks[column].status == rf_ds_reconstructing) { 1410 printf("raid%d: Unable to reconstruct to disk at:\n", 1411 raidPtr->raidid); 1412 printf("raid%d: Col: %d " 1413 "Reconstruction already occurring!\n", 1414 raidPtr->raidid, column); 1415 1416 rf_unlock_mutex2(raidPtr->mutex); 1417 return EINVAL; 1418 } 1419 1420 if (raidPtr->Disks[column].status == rf_ds_spared) { 1421 rf_unlock_mutex2(raidPtr->mutex); 1422 return EINVAL; 1423 } 1424 1425 rf_unlock_mutex2(raidPtr->mutex); 1426 1427 struct rf_recon_req_internal *rrint; 1428 rrint = RF_Malloc(sizeof(*rrint)); 1429 if (rrint == NULL) 1430 return ENOMEM; 1431 1432 rrint->col = column; 1433 rrint->raidPtr = raidPtr; 1434 1435 return RF_CREATE_THREAD(raidPtr->recon_thread, 1436 rf_ReconstructInPlaceThread, rrint, "raid_reconip"); 1437 } 1438 1439 static int 1440 rf_check_recon_status(RF_Raid_t *raidPtr, int *data) 1441 { 1442 /* 1443 * This makes no sense on a RAID 0, or if we are not reconstructing 1444 * so tell the user it's done. 1445 */ 1446 if (raidPtr->Layout.map->faultsTolerated == 0 || 1447 raidPtr->status != rf_rs_reconstructing) { 1448 *data = 100; 1449 return 0; 1450 } 1451 if (raidPtr->reconControl->numRUsTotal == 0) { 1452 *data = 0; 1453 return 0; 1454 } 1455 *data = (raidPtr->reconControl->numRUsComplete * 100 1456 / raidPtr->reconControl->numRUsTotal); 1457 return 0; 1458 } 1459 1460 static int 1461 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1462 { 1463 int unit = raidunit(dev); 1464 int part, pmask; 1465 struct raid_softc *rs; 1466 struct dk_softc *dksc; 1467 RF_Config_t *k_cfg; 1468 RF_Raid_t *raidPtr; 1469 RF_AccTotals_t *totals; 1470 RF_SingleComponent_t component; 1471 RF_DeviceConfig_t *d_cfg, *ucfgp; 1472 int retcode = 0; 1473 int column; 1474 RF_ComponentLabel_t *clabel; 1475 RF_SingleComponent_t *sparePtr,*componentPtr; 1476 int d; 1477 1478 if ((rs = raidget(unit, false)) == NULL) 1479 return ENXIO; 1480 1481 dksc = &rs->sc_dksc; 1482 raidPtr = &rs->sc_r; 1483 1484 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev, 1485 (int) DISKPART(dev), (int) unit, cmd)); 1486 1487 /* Must be initialized for these... */ 1488 if (rf_must_be_initialized(rs, cmd)) 1489 return ENXIO; 1490 1491 switch (cmd) { 1492 /* configure the system */ 1493 case RAIDFRAME_CONFIGURE: 1494 if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0) 1495 return retcode; 1496 return rf_construct(rs, k_cfg); 1497 1498 /* shutdown the system */ 1499 case RAIDFRAME_SHUTDOWN: 1500 1501 part = DISKPART(dev); 1502 pmask = (1 << part); 1503 1504 if ((retcode = raidlock(rs)) != 0) 1505 return retcode; 1506 1507 if (DK_BUSY(dksc, pmask) || 1508 raidPtr->recon_in_progress != 0 || 1509 raidPtr->parity_rewrite_in_progress != 0 || 1510 raidPtr->copyback_in_progress != 0) 1511 retcode = EBUSY; 1512 else { 1513 /* detach and free on close */ 1514 rs->sc_flags |= RAIDF_SHUTDOWN; 1515 retcode = 0; 1516 } 1517 1518 raidunlock(rs); 1519 1520 return retcode; 1521 case RAIDFRAME_GET_COMPONENT_LABEL: 1522 return rf_get_component_label(raidPtr, data); 1523 1524 #if RF_DISABLED 1525 case RAIDFRAME_SET_COMPONENT_LABEL: 1526 return rf_set_component_label(raidPtr, data); 1527 #endif 1528 1529 case RAIDFRAME_INIT_LABELS: 1530 return rf_init_component_label(raidPtr, data); 1531 1532 case RAIDFRAME_SET_AUTOCONFIG: 1533 d = rf_set_autoconfig(raidPtr, *(int *) data); 1534 printf("raid%d: New autoconfig value is: %d\n", 1535 raidPtr->raidid, d); 1536 *(int *) data = d; 1537 return retcode; 1538 1539 case RAIDFRAME_SET_ROOT: 1540 d = rf_set_rootpartition(raidPtr, *(int *) data); 1541 printf("raid%d: New rootpartition value is: %d\n", 1542 raidPtr->raidid, d); 1543 *(int *) data = d; 1544 return retcode; 1545 1546 /* initialize all parity */ 1547 case RAIDFRAME_REWRITEPARITY: 1548 1549 if (raidPtr->Layout.map->faultsTolerated == 0) { 1550 /* Parity for RAID 0 is trivially correct */ 1551 raidPtr->parity_good = RF_RAID_CLEAN; 1552 return 0; 1553 } 1554 1555 if (raidPtr->parity_rewrite_in_progress == 1) { 1556 /* Re-write is already in progress! */ 1557 return EINVAL; 1558 } 1559 1560 return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1561 rf_RewriteParityThread, raidPtr,"raid_parity"); 1562 1563 case RAIDFRAME_ADD_HOT_SPARE: 1564 sparePtr = (RF_SingleComponent_t *) data; 1565 memcpy(&component, sparePtr, sizeof(RF_SingleComponent_t)); 1566 return rf_add_hot_spare(raidPtr, &component); 1567 1568 case RAIDFRAME_REMOVE_HOT_SPARE: 1569 return retcode; 1570 1571 case RAIDFRAME_DELETE_COMPONENT: 1572 componentPtr = (RF_SingleComponent_t *)data; 1573 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t)); 1574 return rf_delete_component(raidPtr, &component); 1575 1576 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1577 componentPtr = (RF_SingleComponent_t *)data; 1578 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t)); 1579 return rf_incorporate_hot_spare(raidPtr, &component); 1580 1581 case RAIDFRAME_REBUILD_IN_PLACE: 1582 return rf_rebuild_in_place(raidPtr, data); 1583 1584 case RAIDFRAME_GET_INFO: 1585 ucfgp = *(RF_DeviceConfig_t **)data; 1586 d_cfg = RF_Malloc(sizeof(*d_cfg)); 1587 if (d_cfg == NULL) 1588 return ENOMEM; 1589 retcode = rf_get_info(raidPtr, d_cfg); 1590 if (retcode == 0) { 1591 retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg)); 1592 } 1593 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1594 return retcode; 1595 1596 case RAIDFRAME_CHECK_PARITY: 1597 *(int *) data = raidPtr->parity_good; 1598 return 0; 1599 1600 case RAIDFRAME_PARITYMAP_STATUS: 1601 if (rf_paritymap_ineligible(raidPtr)) 1602 return EINVAL; 1603 rf_paritymap_status(raidPtr->parity_map, data); 1604 return 0; 1605 1606 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1607 if (rf_paritymap_ineligible(raidPtr)) 1608 return EINVAL; 1609 if (raidPtr->parity_map == NULL) 1610 return ENOENT; /* ??? */ 1611 if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0) 1612 return EINVAL; 1613 return 0; 1614 1615 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1616 if (rf_paritymap_ineligible(raidPtr)) 1617 return EINVAL; 1618 *(int *) data = rf_paritymap_get_disable(raidPtr); 1619 return 0; 1620 1621 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1622 if (rf_paritymap_ineligible(raidPtr)) 1623 return EINVAL; 1624 rf_paritymap_set_disable(raidPtr, *(int *)data); 1625 /* XXX should errors be passed up? */ 1626 return 0; 1627 1628 case RAIDFRAME_RESCAN: 1629 return rf_rescan(); 1630 1631 case RAIDFRAME_RESET_ACCTOTALS: 1632 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1633 return 0; 1634 1635 case RAIDFRAME_GET_ACCTOTALS: 1636 totals = (RF_AccTotals_t *) data; 1637 *totals = raidPtr->acc_totals; 1638 return 0; 1639 1640 case RAIDFRAME_KEEP_ACCTOTALS: 1641 raidPtr->keep_acc_totals = *(int *)data; 1642 return 0; 1643 1644 case RAIDFRAME_GET_SIZE: 1645 *(int *) data = raidPtr->totalSectors; 1646 return 0; 1647 1648 case RAIDFRAME_FAIL_DISK: 1649 return rf_fail_disk(raidPtr, data); 1650 1651 /* invoke a copyback operation after recon on whatever disk 1652 * needs it, if any */ 1653 case RAIDFRAME_COPYBACK: 1654 1655 if (raidPtr->Layout.map->faultsTolerated == 0) { 1656 /* This makes no sense on a RAID 0!! */ 1657 return EINVAL; 1658 } 1659 1660 if (raidPtr->copyback_in_progress == 1) { 1661 /* Copyback is already in progress! */ 1662 return EINVAL; 1663 } 1664 1665 return RF_CREATE_THREAD(raidPtr->copyback_thread, 1666 rf_CopybackThread, raidPtr, "raid_copyback"); 1667 1668 /* return the percentage completion of reconstruction */ 1669 case RAIDFRAME_CHECK_RECON_STATUS: 1670 return rf_check_recon_status(raidPtr, data); 1671 1672 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1673 rf_check_recon_status_ext(raidPtr, data); 1674 return 0; 1675 1676 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1677 if (raidPtr->Layout.map->faultsTolerated == 0) { 1678 /* This makes no sense on a RAID 0, so tell the 1679 user it's done. */ 1680 *(int *) data = 100; 1681 return 0; 1682 } 1683 if (raidPtr->parity_rewrite_in_progress == 1) { 1684 *(int *) data = 100 * 1685 raidPtr->parity_rewrite_stripes_done / 1686 raidPtr->Layout.numStripe; 1687 } else { 1688 *(int *) data = 100; 1689 } 1690 return 0; 1691 1692 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1693 rf_check_parityrewrite_status_ext(raidPtr, data); 1694 return 0; 1695 1696 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1697 if (raidPtr->Layout.map->faultsTolerated == 0) { 1698 /* This makes no sense on a RAID 0 */ 1699 *(int *) data = 100; 1700 return 0; 1701 } 1702 if (raidPtr->copyback_in_progress == 1) { 1703 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1704 raidPtr->Layout.numStripe; 1705 } else { 1706 *(int *) data = 100; 1707 } 1708 return 0; 1709 1710 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1711 rf_check_copyback_status_ext(raidPtr, data); 1712 return 0; 1713 1714 case RAIDFRAME_SET_LAST_UNIT: 1715 for (column = 0; column < raidPtr->numCol; column++) 1716 if (raidPtr->Disks[column].status != rf_ds_optimal) 1717 return EBUSY; 1718 1719 for (column = 0; column < raidPtr->numCol; column++) { 1720 clabel = raidget_component_label(raidPtr, column); 1721 clabel->last_unit = *(int *)data; 1722 raidflush_component_label(raidPtr, column); 1723 } 1724 rs->sc_cflags |= RAIDF_UNIT_CHANGED; 1725 return 0; 1726 1727 /* the sparetable daemon calls this to wait for the kernel to 1728 * need a spare table. this ioctl does not return until a 1729 * spare table is needed. XXX -- calling mpsleep here in the 1730 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1731 * -- I should either compute the spare table in the kernel, 1732 * or have a different -- XXX XXX -- interface (a different 1733 * character device) for delivering the table -- XXX */ 1734 #if RF_DISABLED 1735 case RAIDFRAME_SPARET_WAIT: 1736 rf_lock_mutex2(rf_sparet_wait_mutex); 1737 while (!rf_sparet_wait_queue) 1738 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex); 1739 RF_SparetWait_t *waitreq = rf_sparet_wait_queue; 1740 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1741 rf_unlock_mutex2(rf_sparet_wait_mutex); 1742 1743 /* structure assignment */ 1744 *((RF_SparetWait_t *) data) = *waitreq; 1745 1746 RF_Free(waitreq, sizeof(*waitreq)); 1747 return 0; 1748 1749 /* wakes up a process waiting on SPARET_WAIT and puts an error 1750 * code in it that will cause the dameon to exit */ 1751 case RAIDFRAME_ABORT_SPARET_WAIT: 1752 waitreq = RF_Malloc(sizeof(*waitreq)); 1753 waitreq->fcol = -1; 1754 rf_lock_mutex2(rf_sparet_wait_mutex); 1755 waitreq->next = rf_sparet_wait_queue; 1756 rf_sparet_wait_queue = waitreq; 1757 rf_broadcast_cond2(rf_sparet_wait_cv); 1758 rf_unlock_mutex2(rf_sparet_wait_mutex); 1759 return 0; 1760 1761 /* used by the spare table daemon to deliver a spare table 1762 * into the kernel */ 1763 case RAIDFRAME_SEND_SPARET: 1764 1765 /* install the spare table */ 1766 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1767 1768 /* respond to the requestor. the return status of the spare 1769 * table installation is passed in the "fcol" field */ 1770 waitred = RF_Malloc(sizeof(*waitreq)); 1771 waitreq->fcol = retcode; 1772 rf_lock_mutex2(rf_sparet_wait_mutex); 1773 waitreq->next = rf_sparet_resp_queue; 1774 rf_sparet_resp_queue = waitreq; 1775 rf_broadcast_cond2(rf_sparet_resp_cv); 1776 rf_unlock_mutex2(rf_sparet_wait_mutex); 1777 1778 return retcode; 1779 #endif 1780 default: 1781 /* 1782 * Don't bother trying to load compat modules 1783 * if it is not our ioctl. This is more efficient 1784 * and makes rump tests not depend on compat code 1785 */ 1786 if (IOCGROUP(cmd) != 'r') 1787 break; 1788 #ifdef _LP64 1789 if ((l->l_proc->p_flag & PK_32) != 0) { 1790 module_autoload("compat_netbsd32_raid", 1791 MODULE_CLASS_EXEC); 1792 MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook, 1793 (rs, cmd, data), enosys(), retcode); 1794 if (retcode != EPASSTHROUGH) 1795 return retcode; 1796 } 1797 #endif 1798 module_autoload("compat_raid_80", MODULE_CLASS_EXEC); 1799 MODULE_HOOK_CALL(raidframe_ioctl_80_hook, 1800 (rs, cmd, data), enosys(), retcode); 1801 if (retcode != EPASSTHROUGH) 1802 return retcode; 1803 1804 module_autoload("compat_raid_50", MODULE_CLASS_EXEC); 1805 MODULE_HOOK_CALL(raidframe_ioctl_50_hook, 1806 (rs, cmd, data), enosys(), retcode); 1807 if (retcode != EPASSTHROUGH) 1808 return retcode; 1809 break; /* fall through to the os-specific code below */ 1810 1811 } 1812 1813 if (!raidPtr->valid) 1814 return EINVAL; 1815 1816 /* 1817 * Add support for "regular" device ioctls here. 1818 */ 1819 1820 switch (cmd) { 1821 case DIOCGCACHE: 1822 retcode = rf_get_component_caches(raidPtr, (int *)data); 1823 break; 1824 1825 case DIOCCACHESYNC: 1826 retcode = rf_sync_component_caches(raidPtr, *(int *)data); 1827 break; 1828 1829 default: 1830 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l); 1831 break; 1832 } 1833 1834 return retcode; 1835 1836 } 1837 1838 1839 /* raidinit -- complete the rest of the initialization for the 1840 RAIDframe device. */ 1841 1842 1843 static void 1844 raidinit(struct raid_softc *rs) 1845 { 1846 cfdata_t cf; 1847 unsigned int unit; 1848 struct dk_softc *dksc = &rs->sc_dksc; 1849 RF_Raid_t *raidPtr = &rs->sc_r; 1850 device_t dev; 1851 1852 unit = raidPtr->raidid; 1853 1854 /* XXX doesn't check bounds. */ 1855 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit); 1856 1857 /* attach the pseudo device */ 1858 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1859 cf->cf_name = raid_cd.cd_name; 1860 cf->cf_atname = raid_cd.cd_name; 1861 cf->cf_unit = unit; 1862 cf->cf_fstate = FSTATE_STAR; 1863 1864 dev = config_attach_pseudo(cf); 1865 if (dev == NULL) { 1866 printf("raid%d: config_attach_pseudo failed\n", 1867 raidPtr->raidid); 1868 free(cf, M_RAIDFRAME); 1869 return; 1870 } 1871 1872 /* provide a backpointer to the real softc */ 1873 raidsoftc(dev) = rs; 1874 1875 /* disk_attach actually creates space for the CPU disklabel, among 1876 * other things, so it's critical to call this *BEFORE* we try putzing 1877 * with disklabels. */ 1878 dk_init(dksc, dev, DKTYPE_RAID); 1879 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver); 1880 1881 /* XXX There may be a weird interaction here between this, and 1882 * protectedSectors, as used in RAIDframe. */ 1883 1884 rs->sc_size = raidPtr->totalSectors; 1885 1886 /* Attach dk and disk subsystems */ 1887 dk_attach(dksc); 1888 disk_attach(&dksc->sc_dkdev); 1889 rf_set_geometry(rs, raidPtr); 1890 1891 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK); 1892 1893 /* mark unit as usuable */ 1894 rs->sc_flags |= RAIDF_INITED; 1895 1896 dkwedge_discover(&dksc->sc_dkdev); 1897 } 1898 1899 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1900 /* wake up the daemon & tell it to get us a spare table 1901 * XXX 1902 * the entries in the queues should be tagged with the raidPtr 1903 * so that in the extremely rare case that two recons happen at once, 1904 * we know for which device were requesting a spare table 1905 * XXX 1906 * 1907 * XXX This code is not currently used. GO 1908 */ 1909 int 1910 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1911 { 1912 int retcode; 1913 1914 rf_lock_mutex2(rf_sparet_wait_mutex); 1915 req->next = rf_sparet_wait_queue; 1916 rf_sparet_wait_queue = req; 1917 rf_broadcast_cond2(rf_sparet_wait_cv); 1918 1919 /* mpsleep unlocks the mutex */ 1920 while (!rf_sparet_resp_queue) { 1921 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex); 1922 } 1923 req = rf_sparet_resp_queue; 1924 rf_sparet_resp_queue = req->next; 1925 rf_unlock_mutex2(rf_sparet_wait_mutex); 1926 1927 retcode = req->fcol; 1928 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1929 * alloc'd */ 1930 return retcode; 1931 } 1932 #endif 1933 1934 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1935 * bp & passes it down. 1936 * any calls originating in the kernel must use non-blocking I/O 1937 * do some extra sanity checking to return "appropriate" error values for 1938 * certain conditions (to make some standard utilities work) 1939 * 1940 * Formerly known as: rf_DoAccessKernel 1941 */ 1942 void 1943 raidstart(RF_Raid_t *raidPtr) 1944 { 1945 struct raid_softc *rs; 1946 struct dk_softc *dksc; 1947 1948 rs = raidPtr->softc; 1949 dksc = &rs->sc_dksc; 1950 /* quick check to see if anything has died recently */ 1951 rf_lock_mutex2(raidPtr->mutex); 1952 if (raidPtr->numNewFailures > 0) { 1953 rf_unlock_mutex2(raidPtr->mutex); 1954 rf_update_component_labels(raidPtr, 1955 RF_NORMAL_COMPONENT_UPDATE); 1956 rf_lock_mutex2(raidPtr->mutex); 1957 raidPtr->numNewFailures--; 1958 } 1959 rf_unlock_mutex2(raidPtr->mutex); 1960 1961 if ((rs->sc_flags & RAIDF_INITED) == 0) { 1962 printf("raid%d: raidstart not ready\n", raidPtr->raidid); 1963 return; 1964 } 1965 1966 dk_start(dksc, NULL); 1967 } 1968 1969 static int 1970 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp) 1971 { 1972 RF_SectorCount_t num_blocks, pb, sum; 1973 RF_RaidAddr_t raid_addr; 1974 daddr_t blocknum; 1975 int rc; 1976 1977 rf_lock_mutex2(raidPtr->mutex); 1978 if (raidPtr->openings == 0) { 1979 rf_unlock_mutex2(raidPtr->mutex); 1980 return EAGAIN; 1981 } 1982 rf_unlock_mutex2(raidPtr->mutex); 1983 1984 blocknum = bp->b_rawblkno; 1985 1986 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 1987 (int) blocknum)); 1988 1989 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 1990 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 1991 1992 /* *THIS* is where we adjust what block we're going to... 1993 * but DO NOT TOUCH bp->b_blkno!!! */ 1994 raid_addr = blocknum; 1995 1996 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 1997 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 1998 sum = raid_addr + num_blocks + pb; 1999 if (1 || rf_debugKernelAccess) { 2000 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 2001 (int) raid_addr, (int) sum, (int) num_blocks, 2002 (int) pb, (int) bp->b_resid)); 2003 } 2004 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 2005 || (sum < num_blocks) || (sum < pb)) { 2006 rc = ENOSPC; 2007 goto done; 2008 } 2009 /* 2010 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 2011 */ 2012 2013 if (bp->b_bcount & raidPtr->sectorMask) { 2014 rc = ENOSPC; 2015 goto done; 2016 } 2017 db1_printf(("Calling DoAccess..\n")); 2018 2019 2020 rf_lock_mutex2(raidPtr->mutex); 2021 raidPtr->openings--; 2022 rf_unlock_mutex2(raidPtr->mutex); 2023 2024 /* don't ever condition on bp->b_flags & B_WRITE. 2025 * always condition on B_READ instead */ 2026 2027 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 2028 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 2029 raid_addr, num_blocks, 2030 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 2031 2032 done: 2033 return rc; 2034 } 2035 2036 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 2037 2038 int 2039 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 2040 { 2041 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2042 struct buf *bp; 2043 2044 req->queue = queue; 2045 bp = req->bp; 2046 2047 switch (req->type) { 2048 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2049 /* XXX need to do something extra here.. */ 2050 /* I'm leaving this in, as I've never actually seen it used, 2051 * and I'd like folks to report it... GO */ 2052 printf("%s: WAKEUP CALLED\n", __func__); 2053 queue->numOutstanding++; 2054 2055 bp->b_flags = 0; 2056 bp->b_private = req; 2057 2058 KernelWakeupFunc(bp); 2059 break; 2060 2061 case RF_IO_TYPE_READ: 2062 case RF_IO_TYPE_WRITE: 2063 #if RF_ACC_TRACE > 0 2064 if (req->tracerec) { 2065 RF_ETIMER_START(req->tracerec->timer); 2066 } 2067 #endif 2068 InitBP(bp, queue->rf_cinfo->ci_vp, 2069 op, queue->rf_cinfo->ci_dev, 2070 req->sectorOffset, req->numSector, 2071 req->buf, KernelWakeupFunc, (void *) req, 2072 queue->raidPtr->logBytesPerSector); 2073 2074 if (rf_debugKernelAccess) { 2075 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2076 (long) bp->b_blkno)); 2077 } 2078 queue->numOutstanding++; 2079 queue->last_deq_sector = req->sectorOffset; 2080 /* acc wouldn't have been let in if there were any pending 2081 * reqs at any other priority */ 2082 queue->curPriority = req->priority; 2083 2084 db1_printf(("Going for %c to unit %d col %d\n", 2085 req->type, queue->raidPtr->raidid, 2086 queue->col)); 2087 db1_printf(("sector %d count %d (%d bytes) %d\n", 2088 (int) req->sectorOffset, (int) req->numSector, 2089 (int) (req->numSector << 2090 queue->raidPtr->logBytesPerSector), 2091 (int) queue->raidPtr->logBytesPerSector)); 2092 2093 /* 2094 * XXX: drop lock here since this can block at 2095 * least with backing SCSI devices. Retake it 2096 * to minimize fuss with calling interfaces. 2097 */ 2098 2099 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam"); 2100 bdev_strategy(bp); 2101 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam"); 2102 break; 2103 2104 default: 2105 panic("bad req->type in rf_DispatchKernelIO"); 2106 } 2107 db1_printf(("Exiting from DispatchKernelIO\n")); 2108 2109 return 0; 2110 } 2111 /* this is the callback function associated with a I/O invoked from 2112 kernel code. 2113 */ 2114 static void 2115 KernelWakeupFunc(struct buf *bp) 2116 { 2117 RF_DiskQueueData_t *req = NULL; 2118 RF_DiskQueue_t *queue; 2119 2120 db1_printf(("recovering the request queue:\n")); 2121 2122 req = bp->b_private; 2123 2124 queue = (RF_DiskQueue_t *) req->queue; 2125 2126 rf_lock_mutex2(queue->raidPtr->iodone_lock); 2127 2128 #if RF_ACC_TRACE > 0 2129 if (req->tracerec) { 2130 RF_ETIMER_STOP(req->tracerec->timer); 2131 RF_ETIMER_EVAL(req->tracerec->timer); 2132 rf_lock_mutex2(rf_tracing_mutex); 2133 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2134 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2135 req->tracerec->num_phys_ios++; 2136 rf_unlock_mutex2(rf_tracing_mutex); 2137 } 2138 #endif 2139 2140 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2141 * ballistic, and mark the component as hosed... */ 2142 2143 if (bp->b_error != 0) { 2144 /* Mark the disk as dead */ 2145 /* but only mark it once... */ 2146 /* and only if it wouldn't leave this RAID set 2147 completely broken */ 2148 if (((queue->raidPtr->Disks[queue->col].status == 2149 rf_ds_optimal) || 2150 (queue->raidPtr->Disks[queue->col].status == 2151 rf_ds_used_spare)) && 2152 (queue->raidPtr->numFailures < 2153 queue->raidPtr->Layout.map->faultsTolerated)) { 2154 printf("raid%d: IO Error (%d). Marking %s as failed.\n", 2155 queue->raidPtr->raidid, 2156 bp->b_error, 2157 queue->raidPtr->Disks[queue->col].devname); 2158 queue->raidPtr->Disks[queue->col].status = 2159 rf_ds_failed; 2160 queue->raidPtr->status = rf_rs_degraded; 2161 queue->raidPtr->numFailures++; 2162 queue->raidPtr->numNewFailures++; 2163 } else { /* Disk is already dead... */ 2164 /* printf("Disk already marked as dead!\n"); */ 2165 } 2166 2167 } 2168 2169 /* Fill in the error value */ 2170 req->error = bp->b_error; 2171 2172 /* Drop this one on the "finished" queue... */ 2173 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2174 2175 /* Let the raidio thread know there is work to be done. */ 2176 rf_signal_cond2(queue->raidPtr->iodone_cv); 2177 2178 rf_unlock_mutex2(queue->raidPtr->iodone_lock); 2179 } 2180 2181 2182 /* 2183 * initialize a buf structure for doing an I/O in the kernel. 2184 */ 2185 static void 2186 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2187 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2188 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector) 2189 { 2190 bp->b_flags = rw_flag | (bp->b_flags & rf_b_pass); 2191 bp->b_oflags = 0; 2192 bp->b_cflags = 0; 2193 bp->b_bcount = numSect << logBytesPerSector; 2194 bp->b_bufsize = bp->b_bcount; 2195 bp->b_error = 0; 2196 bp->b_dev = dev; 2197 bp->b_data = bf; 2198 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT; 2199 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2200 if (bp->b_bcount == 0) { 2201 panic("bp->b_bcount is zero in InitBP!!"); 2202 } 2203 bp->b_iodone = cbFunc; 2204 bp->b_private = cbArg; 2205 } 2206 2207 /* 2208 * Wait interruptibly for an exclusive lock. 2209 * 2210 * XXX 2211 * Several drivers do this; it should be abstracted and made MP-safe. 2212 * (Hmm... where have we seen this warning before :-> GO ) 2213 */ 2214 static int 2215 raidlock(struct raid_softc *rs) 2216 { 2217 int error; 2218 2219 error = 0; 2220 mutex_enter(&rs->sc_mutex); 2221 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2222 rs->sc_flags |= RAIDF_WANTED; 2223 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex); 2224 if (error != 0) 2225 goto done; 2226 } 2227 rs->sc_flags |= RAIDF_LOCKED; 2228 done: 2229 mutex_exit(&rs->sc_mutex); 2230 return error; 2231 } 2232 /* 2233 * Unlock and wake up any waiters. 2234 */ 2235 static void 2236 raidunlock(struct raid_softc *rs) 2237 { 2238 2239 mutex_enter(&rs->sc_mutex); 2240 rs->sc_flags &= ~RAIDF_LOCKED; 2241 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2242 rs->sc_flags &= ~RAIDF_WANTED; 2243 cv_broadcast(&rs->sc_cv); 2244 } 2245 mutex_exit(&rs->sc_mutex); 2246 } 2247 2248 2249 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2250 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2251 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE 2252 2253 static daddr_t 2254 rf_component_info_offset(void) 2255 { 2256 2257 return RF_COMPONENT_INFO_OFFSET; 2258 } 2259 2260 static daddr_t 2261 rf_component_info_size(unsigned secsize) 2262 { 2263 daddr_t info_size; 2264 2265 KASSERT(secsize); 2266 if (secsize > RF_COMPONENT_INFO_SIZE) 2267 info_size = secsize; 2268 else 2269 info_size = RF_COMPONENT_INFO_SIZE; 2270 2271 return info_size; 2272 } 2273 2274 static daddr_t 2275 rf_parity_map_offset(RF_Raid_t *raidPtr) 2276 { 2277 daddr_t map_offset; 2278 2279 KASSERT(raidPtr->bytesPerSector); 2280 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE) 2281 map_offset = raidPtr->bytesPerSector; 2282 else 2283 map_offset = RF_COMPONENT_INFO_SIZE; 2284 map_offset += rf_component_info_offset(); 2285 2286 return map_offset; 2287 } 2288 2289 static daddr_t 2290 rf_parity_map_size(RF_Raid_t *raidPtr) 2291 { 2292 daddr_t map_size; 2293 2294 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE) 2295 map_size = raidPtr->bytesPerSector; 2296 else 2297 map_size = RF_PARITY_MAP_SIZE; 2298 2299 return map_size; 2300 } 2301 2302 int 2303 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col) 2304 { 2305 RF_ComponentLabel_t *clabel; 2306 2307 clabel = raidget_component_label(raidPtr, col); 2308 clabel->clean = RF_RAID_CLEAN; 2309 raidflush_component_label(raidPtr, col); 2310 return(0); 2311 } 2312 2313 2314 int 2315 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col) 2316 { 2317 RF_ComponentLabel_t *clabel; 2318 2319 clabel = raidget_component_label(raidPtr, col); 2320 clabel->clean = RF_RAID_DIRTY; 2321 raidflush_component_label(raidPtr, col); 2322 return(0); 2323 } 2324 2325 int 2326 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2327 { 2328 KASSERT(raidPtr->bytesPerSector); 2329 2330 return raidread_component_label(raidPtr->bytesPerSector, 2331 raidPtr->Disks[col].dev, 2332 raidPtr->raid_cinfo[col].ci_vp, 2333 &raidPtr->raid_cinfo[col].ci_label); 2334 } 2335 2336 RF_ComponentLabel_t * 2337 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2338 { 2339 return &raidPtr->raid_cinfo[col].ci_label; 2340 } 2341 2342 int 2343 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2344 { 2345 RF_ComponentLabel_t *label; 2346 2347 label = &raidPtr->raid_cinfo[col].ci_label; 2348 label->mod_counter = raidPtr->mod_counter; 2349 #ifndef RF_NO_PARITY_MAP 2350 label->parity_map_modcount = label->mod_counter; 2351 #endif 2352 return raidwrite_component_label(raidPtr->bytesPerSector, 2353 raidPtr->Disks[col].dev, 2354 raidPtr->raid_cinfo[col].ci_vp, label); 2355 } 2356 2357 /* 2358 * Swap the label endianness. 2359 * 2360 * Everything in the component label is 4-byte-swapped except the version, 2361 * which is kept in the byte-swapped version at all times, and indicates 2362 * for the writer that a swap is necessary. 2363 * 2364 * For reads it is expected that out_label == clabel, but writes expect 2365 * separate labels so only the re-swapped label is written out to disk, 2366 * leaving the swapped-except-version internally. 2367 * 2368 * Only support swapping label version 2. 2369 */ 2370 static void 2371 rf_swap_label(RF_ComponentLabel_t *clabel, RF_ComponentLabel_t *out_label) 2372 { 2373 int *in, *out, *in_last; 2374 2375 KASSERT(clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)); 2376 2377 /* Don't swap the label, but do copy it. */ 2378 out_label->version = clabel->version; 2379 2380 in = &clabel->serial_number; 2381 in_last = &clabel->future_use2[42]; 2382 out = &out_label->serial_number; 2383 2384 for (; in < in_last; in++, out++) 2385 *out = bswap32(*in); 2386 } 2387 2388 static int 2389 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2390 RF_ComponentLabel_t *clabel) 2391 { 2392 int error; 2393 2394 error = raidread_component_area(dev, b_vp, clabel, 2395 sizeof(RF_ComponentLabel_t), 2396 rf_component_info_offset(), 2397 rf_component_info_size(secsize)); 2398 2399 if (error == 0 && 2400 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) { 2401 rf_swap_label(clabel, clabel); 2402 } 2403 2404 return error; 2405 } 2406 2407 /* ARGSUSED */ 2408 static int 2409 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data, 2410 size_t msize, daddr_t offset, daddr_t dsize) 2411 { 2412 struct buf *bp; 2413 int error; 2414 2415 /* XXX should probably ensure that we don't try to do this if 2416 someone has changed rf_protected_sectors. */ 2417 2418 if (b_vp == NULL) { 2419 /* For whatever reason, this component is not valid. 2420 Don't try to read a component label from it. */ 2421 return(EINVAL); 2422 } 2423 2424 /* get a block of the appropriate size... */ 2425 bp = geteblk((int)dsize); 2426 bp->b_dev = dev; 2427 2428 /* get our ducks in a row for the read */ 2429 bp->b_blkno = offset / DEV_BSIZE; 2430 bp->b_bcount = dsize; 2431 bp->b_flags |= B_READ; 2432 bp->b_resid = dsize; 2433 2434 bdev_strategy(bp); 2435 error = biowait(bp); 2436 2437 if (!error) { 2438 memcpy(data, bp->b_data, msize); 2439 } 2440 2441 brelse(bp, 0); 2442 return(error); 2443 } 2444 2445 static int 2446 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2447 RF_ComponentLabel_t *clabel) 2448 { 2449 RF_ComponentLabel_t *clabel_write = clabel; 2450 RF_ComponentLabel_t lclabel; 2451 int error; 2452 2453 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) { 2454 clabel_write = &lclabel; 2455 rf_swap_label(clabel, clabel_write); 2456 } 2457 error = raidwrite_component_area(dev, b_vp, clabel_write, 2458 sizeof(RF_ComponentLabel_t), 2459 rf_component_info_offset(), 2460 rf_component_info_size(secsize), 0); 2461 2462 return error; 2463 } 2464 2465 /* ARGSUSED */ 2466 static int 2467 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data, 2468 size_t msize, daddr_t offset, daddr_t dsize, int asyncp) 2469 { 2470 struct buf *bp; 2471 int error; 2472 2473 /* get a block of the appropriate size... */ 2474 bp = geteblk((int)dsize); 2475 bp->b_dev = dev; 2476 2477 /* get our ducks in a row for the write */ 2478 bp->b_blkno = offset / DEV_BSIZE; 2479 bp->b_bcount = dsize; 2480 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0); 2481 bp->b_resid = dsize; 2482 2483 memset(bp->b_data, 0, dsize); 2484 memcpy(bp->b_data, data, msize); 2485 2486 bdev_strategy(bp); 2487 if (asyncp) 2488 return 0; 2489 error = biowait(bp); 2490 brelse(bp, 0); 2491 if (error) { 2492 #if 1 2493 printf("Failed to write RAID component info!\n"); 2494 #endif 2495 } 2496 2497 return(error); 2498 } 2499 2500 void 2501 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2502 { 2503 int c; 2504 2505 for (c = 0; c < raidPtr->numCol; c++) { 2506 /* Skip dead disks. */ 2507 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2508 continue; 2509 /* XXXjld: what if an error occurs here? */ 2510 raidwrite_component_area(raidPtr->Disks[c].dev, 2511 raidPtr->raid_cinfo[c].ci_vp, map, 2512 RF_PARITYMAP_NBYTE, 2513 rf_parity_map_offset(raidPtr), 2514 rf_parity_map_size(raidPtr), 0); 2515 } 2516 } 2517 2518 void 2519 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2520 { 2521 struct rf_paritymap_ondisk tmp; 2522 int c,first; 2523 2524 first=1; 2525 for (c = 0; c < raidPtr->numCol; c++) { 2526 /* Skip dead disks. */ 2527 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2528 continue; 2529 raidread_component_area(raidPtr->Disks[c].dev, 2530 raidPtr->raid_cinfo[c].ci_vp, &tmp, 2531 RF_PARITYMAP_NBYTE, 2532 rf_parity_map_offset(raidPtr), 2533 rf_parity_map_size(raidPtr)); 2534 if (first) { 2535 memcpy(map, &tmp, sizeof(*map)); 2536 first = 0; 2537 } else { 2538 rf_paritymap_merge(map, &tmp); 2539 } 2540 } 2541 } 2542 2543 void 2544 rf_markalldirty(RF_Raid_t *raidPtr) 2545 { 2546 RF_ComponentLabel_t *clabel; 2547 int sparecol; 2548 int c; 2549 int j; 2550 int scol = -1; 2551 2552 raidPtr->mod_counter++; 2553 for (c = 0; c < raidPtr->numCol; c++) { 2554 /* we don't want to touch (at all) a disk that has 2555 failed */ 2556 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2557 clabel = raidget_component_label(raidPtr, c); 2558 if (clabel->status == rf_ds_spared) { 2559 /* XXX do something special... 2560 but whatever you do, don't 2561 try to access it!! */ 2562 } else { 2563 raidmarkdirty(raidPtr, c); 2564 } 2565 } 2566 } 2567 2568 for( c = 0; c < raidPtr->numSpare ; c++) { 2569 sparecol = raidPtr->numCol + c; 2570 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2571 /* 2572 2573 we claim this disk is "optimal" if it's 2574 rf_ds_used_spare, as that means it should be 2575 directly substitutable for the disk it replaced. 2576 We note that too... 2577 2578 */ 2579 2580 for(j=0;j<raidPtr->numCol;j++) { 2581 if (raidPtr->Disks[j].spareCol == sparecol) { 2582 scol = j; 2583 break; 2584 } 2585 } 2586 2587 clabel = raidget_component_label(raidPtr, sparecol); 2588 /* make sure status is noted */ 2589 2590 raid_init_component_label(raidPtr, clabel); 2591 2592 clabel->row = 0; 2593 clabel->column = scol; 2594 /* Note: we *don't* change status from rf_ds_used_spare 2595 to rf_ds_optimal */ 2596 /* clabel.status = rf_ds_optimal; */ 2597 2598 raidmarkdirty(raidPtr, sparecol); 2599 } 2600 } 2601 } 2602 2603 2604 void 2605 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2606 { 2607 RF_ComponentLabel_t *clabel; 2608 int sparecol; 2609 int c; 2610 int j; 2611 int scol; 2612 struct raid_softc *rs = raidPtr->softc; 2613 2614 scol = -1; 2615 2616 /* XXX should do extra checks to make sure things really are clean, 2617 rather than blindly setting the clean bit... */ 2618 2619 raidPtr->mod_counter++; 2620 2621 for (c = 0; c < raidPtr->numCol; c++) { 2622 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2623 clabel = raidget_component_label(raidPtr, c); 2624 /* make sure status is noted */ 2625 clabel->status = rf_ds_optimal; 2626 2627 /* note what unit we are configured as */ 2628 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0) 2629 clabel->last_unit = raidPtr->raidid; 2630 2631 raidflush_component_label(raidPtr, c); 2632 if (final == RF_FINAL_COMPONENT_UPDATE) { 2633 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2634 raidmarkclean(raidPtr, c); 2635 } 2636 } 2637 } 2638 /* else we don't touch it.. */ 2639 } 2640 2641 for( c = 0; c < raidPtr->numSpare ; c++) { 2642 sparecol = raidPtr->numCol + c; 2643 /* Need to ensure that the reconstruct actually completed! */ 2644 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2645 /* 2646 2647 we claim this disk is "optimal" if it's 2648 rf_ds_used_spare, as that means it should be 2649 directly substitutable for the disk it replaced. 2650 We note that too... 2651 2652 */ 2653 2654 for(j=0;j<raidPtr->numCol;j++) { 2655 if (raidPtr->Disks[j].spareCol == sparecol) { 2656 scol = j; 2657 break; 2658 } 2659 } 2660 2661 /* XXX shouldn't *really* need this... */ 2662 clabel = raidget_component_label(raidPtr, sparecol); 2663 /* make sure status is noted */ 2664 2665 raid_init_component_label(raidPtr, clabel); 2666 2667 clabel->column = scol; 2668 clabel->status = rf_ds_optimal; 2669 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0) 2670 clabel->last_unit = raidPtr->raidid; 2671 2672 raidflush_component_label(raidPtr, sparecol); 2673 if (final == RF_FINAL_COMPONENT_UPDATE) { 2674 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2675 raidmarkclean(raidPtr, sparecol); 2676 } 2677 } 2678 } 2679 } 2680 } 2681 2682 void 2683 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2684 { 2685 2686 if (vp != NULL) { 2687 if (auto_configured == 1) { 2688 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2689 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2690 vput(vp); 2691 2692 } else { 2693 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2694 } 2695 } 2696 } 2697 2698 2699 void 2700 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2701 { 2702 int r,c; 2703 struct vnode *vp; 2704 int acd; 2705 2706 2707 /* We take this opportunity to close the vnodes like we should.. */ 2708 2709 for (c = 0; c < raidPtr->numCol; c++) { 2710 vp = raidPtr->raid_cinfo[c].ci_vp; 2711 acd = raidPtr->Disks[c].auto_configured; 2712 rf_close_component(raidPtr, vp, acd); 2713 raidPtr->raid_cinfo[c].ci_vp = NULL; 2714 raidPtr->Disks[c].auto_configured = 0; 2715 } 2716 2717 for (r = 0; r < raidPtr->numSpare; r++) { 2718 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2719 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2720 rf_close_component(raidPtr, vp, acd); 2721 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2722 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2723 } 2724 } 2725 2726 2727 static void 2728 rf_ReconThread(struct rf_recon_req_internal *req) 2729 { 2730 int s; 2731 RF_Raid_t *raidPtr; 2732 2733 s = splbio(); 2734 raidPtr = (RF_Raid_t *) req->raidPtr; 2735 raidPtr->recon_in_progress = 1; 2736 2737 if (req->flags & RF_FDFLAGS_RECON_FORCE) { 2738 raidPtr->forceRecon = 1; 2739 } 2740 2741 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2742 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2743 2744 if (req->flags & RF_FDFLAGS_RECON_FORCE) { 2745 raidPtr->forceRecon = 0; 2746 } 2747 2748 RF_Free(req, sizeof(*req)); 2749 2750 raidPtr->recon_in_progress = 0; 2751 splx(s); 2752 2753 /* That's all... */ 2754 kthread_exit(0); /* does not return */ 2755 } 2756 2757 static void 2758 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2759 { 2760 int retcode; 2761 int s; 2762 2763 raidPtr->parity_rewrite_stripes_done = 0; 2764 raidPtr->parity_rewrite_in_progress = 1; 2765 s = splbio(); 2766 retcode = rf_RewriteParity(raidPtr); 2767 splx(s); 2768 if (retcode) { 2769 printf("raid%d: Error re-writing parity (%d)!\n", 2770 raidPtr->raidid, retcode); 2771 } else { 2772 /* set the clean bit! If we shutdown correctly, 2773 the clean bit on each component label will get 2774 set */ 2775 raidPtr->parity_good = RF_RAID_CLEAN; 2776 } 2777 raidPtr->parity_rewrite_in_progress = 0; 2778 2779 /* Anyone waiting for us to stop? If so, inform them... */ 2780 if (raidPtr->waitShutdown) { 2781 rf_lock_mutex2(raidPtr->rad_lock); 2782 cv_broadcast(&raidPtr->parity_rewrite_cv); 2783 rf_unlock_mutex2(raidPtr->rad_lock); 2784 } 2785 2786 /* That's all... */ 2787 kthread_exit(0); /* does not return */ 2788 } 2789 2790 2791 static void 2792 rf_CopybackThread(RF_Raid_t *raidPtr) 2793 { 2794 int s; 2795 2796 raidPtr->copyback_in_progress = 1; 2797 s = splbio(); 2798 rf_CopybackReconstructedData(raidPtr); 2799 splx(s); 2800 raidPtr->copyback_in_progress = 0; 2801 2802 /* That's all... */ 2803 kthread_exit(0); /* does not return */ 2804 } 2805 2806 2807 static void 2808 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req) 2809 { 2810 int s; 2811 RF_Raid_t *raidPtr; 2812 2813 s = splbio(); 2814 raidPtr = req->raidPtr; 2815 raidPtr->recon_in_progress = 1; 2816 2817 if (req->flags & RF_FDFLAGS_RECON_FORCE) { 2818 raidPtr->forceRecon = 1; 2819 } 2820 2821 rf_ReconstructInPlace(raidPtr, req->col); 2822 2823 if (req->flags & RF_FDFLAGS_RECON_FORCE) { 2824 raidPtr->forceRecon = 0; 2825 } 2826 2827 RF_Free(req, sizeof(*req)); 2828 raidPtr->recon_in_progress = 0; 2829 splx(s); 2830 2831 /* That's all... */ 2832 kthread_exit(0); /* does not return */ 2833 } 2834 2835 static RF_AutoConfig_t * 2836 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 2837 const char *cname, RF_SectorCount_t size, uint64_t numsecs, 2838 unsigned secsize) 2839 { 2840 int good_one = 0; 2841 RF_ComponentLabel_t *clabel; 2842 RF_AutoConfig_t *ac; 2843 2844 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK); 2845 2846 if (!raidread_component_label(secsize, dev, vp, clabel)) { 2847 /* Got the label. Does it look reasonable? */ 2848 if (rf_reasonable_label(clabel, numsecs) && 2849 (rf_component_label_partitionsize(clabel) <= size)) { 2850 #ifdef DEBUG 2851 printf("Component on: %s: %llu\n", 2852 cname, (unsigned long long)size); 2853 rf_print_component_label(clabel); 2854 #endif 2855 /* if it's reasonable, add it, else ignore it. */ 2856 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 2857 M_WAITOK); 2858 strlcpy(ac->devname, cname, sizeof(ac->devname)); 2859 ac->dev = dev; 2860 ac->vp = vp; 2861 ac->clabel = clabel; 2862 ac->next = ac_list; 2863 ac_list = ac; 2864 good_one = 1; 2865 } 2866 } 2867 if (!good_one) { 2868 /* cleanup */ 2869 free(clabel, M_RAIDFRAME); 2870 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2871 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2872 vput(vp); 2873 } 2874 return ac_list; 2875 } 2876 2877 static RF_AutoConfig_t * 2878 rf_find_raid_components(void) 2879 { 2880 struct vnode *vp; 2881 struct disklabel label; 2882 device_t dv; 2883 deviter_t di; 2884 dev_t dev; 2885 int bmajor, bminor, wedge, rf_part_found; 2886 int error; 2887 int i; 2888 RF_AutoConfig_t *ac_list; 2889 uint64_t numsecs; 2890 unsigned secsize; 2891 int dowedges; 2892 2893 /* initialize the AutoConfig list */ 2894 ac_list = NULL; 2895 2896 /* 2897 * we begin by trolling through *all* the devices on the system *twice* 2898 * first we scan for wedges, second for other devices. This avoids 2899 * using a raw partition instead of a wedge that covers the whole disk 2900 */ 2901 2902 for (dowedges=1; dowedges>=0; --dowedges) { 2903 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; 2904 dv = deviter_next(&di)) { 2905 2906 /* we are only interested in disks */ 2907 if (device_class(dv) != DV_DISK) 2908 continue; 2909 2910 /* we don't care about floppies */ 2911 if (device_is_a(dv, "fd")) { 2912 continue; 2913 } 2914 2915 /* we don't care about CDs. */ 2916 if (device_is_a(dv, "cd")) { 2917 continue; 2918 } 2919 2920 /* we don't care about md. */ 2921 if (device_is_a(dv, "md")) { 2922 continue; 2923 } 2924 2925 /* hdfd is the Atari/Hades floppy driver */ 2926 if (device_is_a(dv, "hdfd")) { 2927 continue; 2928 } 2929 2930 /* fdisa is the Atari/Milan floppy driver */ 2931 if (device_is_a(dv, "fdisa")) { 2932 continue; 2933 } 2934 2935 /* we don't care about spiflash */ 2936 if (device_is_a(dv, "spiflash")) { 2937 continue; 2938 } 2939 2940 /* are we in the wedges pass ? */ 2941 wedge = device_is_a(dv, "dk"); 2942 if (wedge != dowedges) { 2943 continue; 2944 } 2945 2946 /* need to find the device_name_to_block_device_major stuff */ 2947 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 2948 2949 rf_part_found = 0; /*No raid partition as yet*/ 2950 2951 /* get a vnode for the raw partition of this disk */ 2952 bminor = minor(device_unit(dv)); 2953 dev = wedge ? makedev(bmajor, bminor) : 2954 MAKEDISKDEV(bmajor, bminor, RAW_PART); 2955 if (bdevvp(dev, &vp)) 2956 panic("RAID can't alloc vnode"); 2957 2958 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2959 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED); 2960 2961 if (error) { 2962 /* "Who cares." Continue looking 2963 for something that exists*/ 2964 vput(vp); 2965 continue; 2966 } 2967 2968 error = getdisksize(vp, &numsecs, &secsize); 2969 if (error) { 2970 /* 2971 * Pseudo devices like vnd and cgd can be 2972 * opened but may still need some configuration. 2973 * Ignore these quietly. 2974 */ 2975 if (error != ENXIO) 2976 printf("RAIDframe: can't get disk size" 2977 " for dev %s (%d)\n", 2978 device_xname(dv), error); 2979 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2980 vput(vp); 2981 continue; 2982 } 2983 if (wedge) { 2984 struct dkwedge_info dkw; 2985 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 2986 NOCRED); 2987 if (error) { 2988 printf("RAIDframe: can't get wedge info for " 2989 "dev %s (%d)\n", device_xname(dv), error); 2990 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2991 vput(vp); 2992 continue; 2993 } 2994 2995 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 2996 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2997 vput(vp); 2998 continue; 2999 } 3000 3001 VOP_UNLOCK(vp); 3002 ac_list = rf_get_component(ac_list, dev, vp, 3003 device_xname(dv), dkw.dkw_size, numsecs, secsize); 3004 rf_part_found = 1; /*There is a raid component on this disk*/ 3005 continue; 3006 } 3007 3008 /* Ok, the disk exists. Go get the disklabel. */ 3009 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 3010 if (error) { 3011 /* 3012 * XXX can't happen - open() would 3013 * have errored out (or faked up one) 3014 */ 3015 if (error != ENOTTY) 3016 printf("RAIDframe: can't get label for dev " 3017 "%s (%d)\n", device_xname(dv), error); 3018 } 3019 3020 /* don't need this any more. We'll allocate it again 3021 a little later if we really do... */ 3022 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3023 vput(vp); 3024 3025 if (error) 3026 continue; 3027 3028 rf_part_found = 0; /*No raid partitions yet*/ 3029 for (i = 0; i < label.d_npartitions; i++) { 3030 char cname[sizeof(ac_list->devname)]; 3031 3032 /* We only support partitions marked as RAID */ 3033 if (label.d_partitions[i].p_fstype != FS_RAID) 3034 continue; 3035 3036 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 3037 if (bdevvp(dev, &vp)) 3038 panic("RAID can't alloc vnode"); 3039 3040 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3041 error = VOP_OPEN(vp, FREAD, NOCRED); 3042 if (error) { 3043 /* Not quite a 'whatever'. In 3044 * this situation we know 3045 * there is a FS_RAID 3046 * partition, but we can't 3047 * open it. The most likely 3048 * reason is that the 3049 * partition is already in 3050 * use by another RAID set. 3051 * So note that we've already 3052 * found a partition on this 3053 * disk so we don't attempt 3054 * to use the raw disk later. */ 3055 rf_part_found = 1; 3056 vput(vp); 3057 continue; 3058 } 3059 VOP_UNLOCK(vp); 3060 snprintf(cname, sizeof(cname), "%s%c", 3061 device_xname(dv), 'a' + i); 3062 ac_list = rf_get_component(ac_list, dev, vp, cname, 3063 label.d_partitions[i].p_size, numsecs, secsize); 3064 rf_part_found = 1; /*There is at least one raid partition on this disk*/ 3065 } 3066 3067 /* 3068 *If there is no raid component on this disk, either in a 3069 *disklabel or inside a wedge, check the raw partition as well, 3070 *as it is possible to configure raid components on raw disk 3071 *devices. 3072 */ 3073 3074 if (!rf_part_found) { 3075 char cname[sizeof(ac_list->devname)]; 3076 3077 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART); 3078 if (bdevvp(dev, &vp)) 3079 panic("RAID can't alloc vnode"); 3080 3081 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3082 3083 error = VOP_OPEN(vp, FREAD, NOCRED); 3084 if (error) { 3085 /* Whatever... */ 3086 vput(vp); 3087 continue; 3088 } 3089 VOP_UNLOCK(vp); 3090 snprintf(cname, sizeof(cname), "%s%c", 3091 device_xname(dv), 'a' + RAW_PART); 3092 ac_list = rf_get_component(ac_list, dev, vp, cname, 3093 label.d_partitions[RAW_PART].p_size, numsecs, secsize); 3094 } 3095 } 3096 deviter_release(&di); 3097 } 3098 return ac_list; 3099 } 3100 3101 int 3102 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3103 { 3104 3105 if ((clabel->version==RF_COMPONENT_LABEL_VERSION_1 || 3106 clabel->version==RF_COMPONENT_LABEL_VERSION || 3107 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) && 3108 (clabel->clean == RF_RAID_CLEAN || 3109 clabel->clean == RF_RAID_DIRTY) && 3110 clabel->row >=0 && 3111 clabel->column >= 0 && 3112 clabel->num_rows > 0 && 3113 clabel->num_columns > 0 && 3114 clabel->row < clabel->num_rows && 3115 clabel->column < clabel->num_columns && 3116 clabel->blockSize > 0 && 3117 /* 3118 * numBlocksHi may contain garbage, but it is ok since 3119 * the type is unsigned. If it is really garbage, 3120 * rf_fix_old_label_size() will fix it. 3121 */ 3122 rf_component_label_numblocks(clabel) > 0) { 3123 /* 3124 * label looks reasonable enough... 3125 * let's make sure it has no old garbage. 3126 */ 3127 if (numsecs) 3128 rf_fix_old_label_size(clabel, numsecs); 3129 return(1); 3130 } 3131 return(0); 3132 } 3133 3134 3135 /* 3136 * For reasons yet unknown, some old component labels have garbage in 3137 * the newer numBlocksHi region, and this causes lossage. Since those 3138 * disks will also have numsecs set to less than 32 bits of sectors, 3139 * we can determine when this corruption has occurred, and fix it. 3140 * 3141 * The exact same problem, with the same unknown reason, happens to 3142 * the partitionSizeHi member as well. 3143 */ 3144 static void 3145 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3146 { 3147 3148 if (numsecs < ((uint64_t)1 << 32)) { 3149 if (clabel->numBlocksHi) { 3150 printf("WARNING: total sectors < 32 bits, yet " 3151 "numBlocksHi set\n" 3152 "WARNING: resetting numBlocksHi to zero.\n"); 3153 clabel->numBlocksHi = 0; 3154 } 3155 3156 if (clabel->partitionSizeHi) { 3157 printf("WARNING: total sectors < 32 bits, yet " 3158 "partitionSizeHi set\n" 3159 "WARNING: resetting partitionSizeHi to zero.\n"); 3160 clabel->partitionSizeHi = 0; 3161 } 3162 } 3163 } 3164 3165 3166 #ifdef DEBUG 3167 void 3168 rf_print_component_label(RF_ComponentLabel_t *clabel) 3169 { 3170 uint64_t numBlocks; 3171 static const char *rp[] = { 3172 "No", "Force", "Soft", "*invalid*" 3173 }; 3174 3175 3176 numBlocks = rf_component_label_numblocks(clabel); 3177 3178 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3179 clabel->row, clabel->column, 3180 clabel->num_rows, clabel->num_columns); 3181 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3182 clabel->version, clabel->serial_number, 3183 clabel->mod_counter); 3184 printf(" Clean: %s Status: %d\n", 3185 clabel->clean ? "Yes" : "No", clabel->status); 3186 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3187 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3188 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n", 3189 (char) clabel->parityConfig, clabel->blockSize, numBlocks); 3190 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No"); 3191 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]); 3192 printf(" Last configured as: raid%d\n", clabel->last_unit); 3193 #if 0 3194 printf(" Config order: %d\n", clabel->config_order); 3195 #endif 3196 3197 } 3198 #endif 3199 3200 static RF_ConfigSet_t * 3201 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3202 { 3203 RF_AutoConfig_t *ac; 3204 RF_ConfigSet_t *config_sets; 3205 RF_ConfigSet_t *cset; 3206 RF_AutoConfig_t *ac_next; 3207 3208 3209 config_sets = NULL; 3210 3211 /* Go through the AutoConfig list, and figure out which components 3212 belong to what sets. */ 3213 ac = ac_list; 3214 while(ac!=NULL) { 3215 /* we're going to putz with ac->next, so save it here 3216 for use at the end of the loop */ 3217 ac_next = ac->next; 3218 3219 if (config_sets == NULL) { 3220 /* will need at least this one... */ 3221 config_sets = malloc(sizeof(RF_ConfigSet_t), 3222 M_RAIDFRAME, M_WAITOK); 3223 /* this one is easy :) */ 3224 config_sets->ac = ac; 3225 config_sets->next = NULL; 3226 config_sets->rootable = 0; 3227 ac->next = NULL; 3228 } else { 3229 /* which set does this component fit into? */ 3230 cset = config_sets; 3231 while(cset!=NULL) { 3232 if (rf_does_it_fit(cset, ac)) { 3233 /* looks like it matches... */ 3234 ac->next = cset->ac; 3235 cset->ac = ac; 3236 break; 3237 } 3238 cset = cset->next; 3239 } 3240 if (cset==NULL) { 3241 /* didn't find a match above... new set..*/ 3242 cset = malloc(sizeof(RF_ConfigSet_t), 3243 M_RAIDFRAME, M_WAITOK); 3244 cset->ac = ac; 3245 ac->next = NULL; 3246 cset->next = config_sets; 3247 cset->rootable = 0; 3248 config_sets = cset; 3249 } 3250 } 3251 ac = ac_next; 3252 } 3253 3254 3255 return(config_sets); 3256 } 3257 3258 static int 3259 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3260 { 3261 RF_ComponentLabel_t *clabel1, *clabel2; 3262 3263 /* If this one matches the *first* one in the set, that's good 3264 enough, since the other members of the set would have been 3265 through here too... */ 3266 /* note that we are not checking partitionSize here.. 3267 3268 Note that we are also not checking the mod_counters here. 3269 If everything else matches except the mod_counter, that's 3270 good enough for this test. We will deal with the mod_counters 3271 a little later in the autoconfiguration process. 3272 3273 (clabel1->mod_counter == clabel2->mod_counter) && 3274 3275 The reason we don't check for this is that failed disks 3276 will have lower modification counts. If those disks are 3277 not added to the set they used to belong to, then they will 3278 form their own set, which may result in 2 different sets, 3279 for example, competing to be configured at raid0, and 3280 perhaps competing to be the root filesystem set. If the 3281 wrong ones get configured, or both attempt to become /, 3282 weird behaviour and or serious lossage will occur. Thus we 3283 need to bring them into the fold here, and kick them out at 3284 a later point. 3285 3286 */ 3287 3288 clabel1 = cset->ac->clabel; 3289 clabel2 = ac->clabel; 3290 if ((clabel1->version == clabel2->version) && 3291 (clabel1->serial_number == clabel2->serial_number) && 3292 (clabel1->num_rows == clabel2->num_rows) && 3293 (clabel1->num_columns == clabel2->num_columns) && 3294 (clabel1->sectPerSU == clabel2->sectPerSU) && 3295 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3296 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3297 (clabel1->parityConfig == clabel2->parityConfig) && 3298 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3299 (clabel1->blockSize == clabel2->blockSize) && 3300 rf_component_label_numblocks(clabel1) == 3301 rf_component_label_numblocks(clabel2) && 3302 (clabel1->autoconfigure == clabel2->autoconfigure) && 3303 (clabel1->root_partition == clabel2->root_partition) && 3304 (clabel1->last_unit == clabel2->last_unit) && 3305 (clabel1->config_order == clabel2->config_order)) { 3306 /* if it get's here, it almost *has* to be a match */ 3307 } else { 3308 /* it's not consistent with somebody in the set.. 3309 punt */ 3310 return(0); 3311 } 3312 /* all was fine.. it must fit... */ 3313 return(1); 3314 } 3315 3316 static int 3317 rf_have_enough_components(RF_ConfigSet_t *cset) 3318 { 3319 RF_AutoConfig_t *ac; 3320 RF_AutoConfig_t *auto_config; 3321 RF_ComponentLabel_t *clabel; 3322 int c; 3323 int num_cols; 3324 int num_missing; 3325 int mod_counter; 3326 int mod_counter_found; 3327 int even_pair_failed; 3328 char parity_type; 3329 3330 3331 /* check to see that we have enough 'live' components 3332 of this set. If so, we can configure it if necessary */ 3333 3334 num_cols = cset->ac->clabel->num_columns; 3335 parity_type = cset->ac->clabel->parityConfig; 3336 3337 /* XXX Check for duplicate components!?!?!? */ 3338 3339 /* Determine what the mod_counter is supposed to be for this set. */ 3340 3341 mod_counter_found = 0; 3342 mod_counter = 0; 3343 ac = cset->ac; 3344 while(ac!=NULL) { 3345 if (mod_counter_found==0) { 3346 mod_counter = ac->clabel->mod_counter; 3347 mod_counter_found = 1; 3348 } else { 3349 if (ac->clabel->mod_counter > mod_counter) { 3350 mod_counter = ac->clabel->mod_counter; 3351 } 3352 } 3353 ac = ac->next; 3354 } 3355 3356 num_missing = 0; 3357 auto_config = cset->ac; 3358 3359 even_pair_failed = 0; 3360 for(c=0; c<num_cols; c++) { 3361 ac = auto_config; 3362 while(ac!=NULL) { 3363 if ((ac->clabel->column == c) && 3364 (ac->clabel->mod_counter == mod_counter)) { 3365 /* it's this one... */ 3366 #ifdef DEBUG 3367 printf("Found: %s at %d\n", 3368 ac->devname,c); 3369 #endif 3370 break; 3371 } 3372 ac=ac->next; 3373 } 3374 if (ac==NULL) { 3375 /* Didn't find one here! */ 3376 /* special case for RAID 1, especially 3377 where there are more than 2 3378 components (where RAIDframe treats 3379 things a little differently :( ) */ 3380 if (parity_type == '1') { 3381 if (c%2 == 0) { /* even component */ 3382 even_pair_failed = 1; 3383 } else { /* odd component. If 3384 we're failed, and 3385 so is the even 3386 component, it's 3387 "Good Night, Charlie" */ 3388 if (even_pair_failed == 1) { 3389 return(0); 3390 } 3391 } 3392 } else { 3393 /* normal accounting */ 3394 num_missing++; 3395 } 3396 } 3397 if ((parity_type == '1') && (c%2 == 1)) { 3398 /* Just did an even component, and we didn't 3399 bail.. reset the even_pair_failed flag, 3400 and go on to the next component.... */ 3401 even_pair_failed = 0; 3402 } 3403 } 3404 3405 clabel = cset->ac->clabel; 3406 3407 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3408 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3409 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3410 /* XXX this needs to be made *much* more general */ 3411 /* Too many failures */ 3412 return(0); 3413 } 3414 /* otherwise, all is well, and we've got enough to take a kick 3415 at autoconfiguring this set */ 3416 return(1); 3417 } 3418 3419 static void 3420 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3421 RF_Raid_t *raidPtr) 3422 { 3423 RF_ComponentLabel_t *clabel; 3424 int i; 3425 3426 clabel = ac->clabel; 3427 3428 /* 1. Fill in the common stuff */ 3429 config->numCol = clabel->num_columns; 3430 config->numSpare = 0; /* XXX should this be set here? */ 3431 config->sectPerSU = clabel->sectPerSU; 3432 config->SUsPerPU = clabel->SUsPerPU; 3433 config->SUsPerRU = clabel->SUsPerRU; 3434 config->parityConfig = clabel->parityConfig; 3435 /* XXX... */ 3436 strcpy(config->diskQueueType,"fifo"); 3437 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3438 config->layoutSpecificSize = 0; /* XXX ?? */ 3439 3440 while(ac!=NULL) { 3441 /* row/col values will be in range due to the checks 3442 in reasonable_label() */ 3443 strcpy(config->devnames[0][ac->clabel->column], 3444 ac->devname); 3445 ac = ac->next; 3446 } 3447 3448 for(i=0;i<RF_MAXDBGV;i++) { 3449 config->debugVars[i][0] = 0; 3450 } 3451 } 3452 3453 static int 3454 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3455 { 3456 RF_ComponentLabel_t *clabel; 3457 int column; 3458 int sparecol; 3459 3460 raidPtr->autoconfigure = new_value; 3461 3462 for(column=0; column<raidPtr->numCol; column++) { 3463 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3464 clabel = raidget_component_label(raidPtr, column); 3465 clabel->autoconfigure = new_value; 3466 raidflush_component_label(raidPtr, column); 3467 } 3468 } 3469 for(column = 0; column < raidPtr->numSpare ; column++) { 3470 sparecol = raidPtr->numCol + column; 3471 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3472 clabel = raidget_component_label(raidPtr, sparecol); 3473 clabel->autoconfigure = new_value; 3474 raidflush_component_label(raidPtr, sparecol); 3475 } 3476 } 3477 return(new_value); 3478 } 3479 3480 static int 3481 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3482 { 3483 RF_ComponentLabel_t *clabel; 3484 int column; 3485 int sparecol; 3486 3487 raidPtr->root_partition = new_value; 3488 for(column=0; column<raidPtr->numCol; column++) { 3489 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3490 clabel = raidget_component_label(raidPtr, column); 3491 clabel->root_partition = new_value; 3492 raidflush_component_label(raidPtr, column); 3493 } 3494 } 3495 for(column = 0; column < raidPtr->numSpare ; column++) { 3496 sparecol = raidPtr->numCol + column; 3497 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3498 clabel = raidget_component_label(raidPtr, sparecol); 3499 clabel->root_partition = new_value; 3500 raidflush_component_label(raidPtr, sparecol); 3501 } 3502 } 3503 return(new_value); 3504 } 3505 3506 static void 3507 rf_release_all_vps(RF_ConfigSet_t *cset) 3508 { 3509 RF_AutoConfig_t *ac; 3510 3511 ac = cset->ac; 3512 while(ac!=NULL) { 3513 /* Close the vp, and give it back */ 3514 if (ac->vp) { 3515 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3516 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED); 3517 vput(ac->vp); 3518 ac->vp = NULL; 3519 } 3520 ac = ac->next; 3521 } 3522 } 3523 3524 3525 static void 3526 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3527 { 3528 RF_AutoConfig_t *ac; 3529 RF_AutoConfig_t *next_ac; 3530 3531 ac = cset->ac; 3532 while(ac!=NULL) { 3533 next_ac = ac->next; 3534 /* nuke the label */ 3535 free(ac->clabel, M_RAIDFRAME); 3536 /* cleanup the config structure */ 3537 free(ac, M_RAIDFRAME); 3538 /* "next.." */ 3539 ac = next_ac; 3540 } 3541 /* and, finally, nuke the config set */ 3542 free(cset, M_RAIDFRAME); 3543 } 3544 3545 3546 void 3547 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3548 { 3549 /* avoid over-writing byteswapped version. */ 3550 if (clabel->version != bswap32(RF_COMPONENT_LABEL_VERSION)) 3551 clabel->version = RF_COMPONENT_LABEL_VERSION; 3552 clabel->serial_number = raidPtr->serial_number; 3553 clabel->mod_counter = raidPtr->mod_counter; 3554 3555 clabel->num_rows = 1; 3556 clabel->num_columns = raidPtr->numCol; 3557 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3558 clabel->status = rf_ds_optimal; /* "It's good!" */ 3559 3560 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3561 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3562 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3563 3564 clabel->blockSize = raidPtr->bytesPerSector; 3565 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk); 3566 3567 /* XXX not portable */ 3568 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3569 clabel->maxOutstanding = raidPtr->maxOutstanding; 3570 clabel->autoconfigure = raidPtr->autoconfigure; 3571 clabel->root_partition = raidPtr->root_partition; 3572 clabel->last_unit = raidPtr->raidid; 3573 clabel->config_order = raidPtr->config_order; 3574 3575 #ifndef RF_NO_PARITY_MAP 3576 rf_paritymap_init_label(raidPtr->parity_map, clabel); 3577 #endif 3578 } 3579 3580 static struct raid_softc * 3581 rf_auto_config_set(RF_ConfigSet_t *cset) 3582 { 3583 RF_Raid_t *raidPtr; 3584 RF_Config_t *config; 3585 int raidID; 3586 struct raid_softc *sc; 3587 3588 #ifdef DEBUG 3589 printf("RAID autoconfigure\n"); 3590 #endif 3591 3592 /* 1. Create a config structure */ 3593 config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO); 3594 3595 /* 3596 2. Figure out what RAID ID this one is supposed to live at 3597 See if we can get the same RAID dev that it was configured 3598 on last time.. 3599 */ 3600 3601 raidID = cset->ac->clabel->last_unit; 3602 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0; 3603 sc = raidget(++raidID, false)) 3604 continue; 3605 #ifdef DEBUG 3606 printf("Configuring raid%d:\n",raidID); 3607 #endif 3608 3609 if (sc == NULL) 3610 sc = raidget(raidID, true); 3611 raidPtr = &sc->sc_r; 3612 3613 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3614 raidPtr->softc = sc; 3615 raidPtr->raidid = raidID; 3616 raidPtr->openings = RAIDOUTSTANDING; 3617 3618 /* 3. Build the configuration structure */ 3619 rf_create_configuration(cset->ac, config, raidPtr); 3620 3621 /* 4. Do the configuration */ 3622 if (rf_Configure(raidPtr, config, cset->ac) == 0) { 3623 raidinit(sc); 3624 3625 rf_markalldirty(raidPtr); 3626 raidPtr->autoconfigure = 1; /* XXX do this here? */ 3627 switch (cset->ac->clabel->root_partition) { 3628 case 1: /* Force Root */ 3629 case 2: /* Soft Root: root when boot partition part of raid */ 3630 /* 3631 * everything configured just fine. Make a note 3632 * that this set is eligible to be root, 3633 * or forced to be root 3634 */ 3635 cset->rootable = cset->ac->clabel->root_partition; 3636 /* XXX do this here? */ 3637 raidPtr->root_partition = cset->rootable; 3638 break; 3639 default: 3640 break; 3641 } 3642 } else { 3643 raidput(sc); 3644 sc = NULL; 3645 } 3646 3647 /* 5. Cleanup */ 3648 free(config, M_RAIDFRAME); 3649 return sc; 3650 } 3651 3652 void 3653 rf_pool_init(RF_Raid_t *raidPtr, char *w_chan, struct pool *p, size_t size, const char *pool_name, 3654 size_t xmin, size_t xmax) 3655 { 3656 3657 /* Format: raid%d_foo */ 3658 snprintf(w_chan, RF_MAX_POOLNAMELEN, "raid%d_%s", raidPtr->raidid, pool_name); 3659 3660 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3661 pool_sethiwat(p, xmax); 3662 pool_prime(p, xmin); 3663 } 3664 3665 3666 /* 3667 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue 3668 * to see if there is IO pending and if that IO could possibly be done 3669 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1 3670 * otherwise. 3671 * 3672 */ 3673 int 3674 rf_buf_queue_check(RF_Raid_t *raidPtr) 3675 { 3676 struct raid_softc *rs; 3677 struct dk_softc *dksc; 3678 3679 rs = raidPtr->softc; 3680 dksc = &rs->sc_dksc; 3681 3682 if ((rs->sc_flags & RAIDF_INITED) == 0) 3683 return 1; 3684 3685 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) { 3686 /* there is work to do */ 3687 return 0; 3688 } 3689 /* default is nothing to do */ 3690 return 1; 3691 } 3692 3693 int 3694 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr) 3695 { 3696 uint64_t numsecs; 3697 unsigned secsize; 3698 int error; 3699 3700 error = getdisksize(vp, &numsecs, &secsize); 3701 if (error == 0) { 3702 diskPtr->blockSize = secsize; 3703 diskPtr->numBlocks = numsecs - rf_protectedSectors; 3704 diskPtr->partitionSize = numsecs; 3705 return 0; 3706 } 3707 return error; 3708 } 3709 3710 static int 3711 raid_match(device_t self, cfdata_t cfdata, void *aux) 3712 { 3713 return 1; 3714 } 3715 3716 static void 3717 raid_attach(device_t parent, device_t self, void *aux) 3718 { 3719 } 3720 3721 3722 static int 3723 raid_detach(device_t self, int flags) 3724 { 3725 int error; 3726 struct raid_softc *rs = raidsoftc(self); 3727 3728 if (rs == NULL) 3729 return ENXIO; 3730 3731 if ((error = raidlock(rs)) != 0) 3732 return error; 3733 3734 error = raid_detach_unlocked(rs); 3735 3736 raidunlock(rs); 3737 3738 /* XXX raid can be referenced here */ 3739 3740 if (error) 3741 return error; 3742 3743 /* Free the softc */ 3744 raidput(rs); 3745 3746 return 0; 3747 } 3748 3749 static void 3750 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr) 3751 { 3752 struct dk_softc *dksc = &rs->sc_dksc; 3753 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom; 3754 3755 memset(dg, 0, sizeof(*dg)); 3756 3757 dg->dg_secperunit = raidPtr->totalSectors; 3758 dg->dg_secsize = raidPtr->bytesPerSector; 3759 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe; 3760 dg->dg_ntracks = 4 * raidPtr->numCol; 3761 3762 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL); 3763 } 3764 3765 /* 3766 * Get cache info for all the components (including spares). 3767 * Returns intersection of all the cache flags of all disks, or first 3768 * error if any encountered. 3769 * XXXfua feature flags can change as spares are added - lock down somehow 3770 */ 3771 static int 3772 rf_get_component_caches(RF_Raid_t *raidPtr, int *data) 3773 { 3774 int c; 3775 int error; 3776 int dkwhole = 0, dkpart; 3777 3778 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) { 3779 /* 3780 * Check any non-dead disk, even when currently being 3781 * reconstructed. 3782 */ 3783 if (!RF_DEAD_DISK(raidPtr->Disks[c].status) 3784 || raidPtr->Disks[c].status == rf_ds_reconstructing) { 3785 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, 3786 DIOCGCACHE, &dkpart, FREAD, NOCRED); 3787 if (error) { 3788 if (error != ENODEV) { 3789 printf("raid%d: get cache for component %s failed\n", 3790 raidPtr->raidid, 3791 raidPtr->Disks[c].devname); 3792 } 3793 3794 return error; 3795 } 3796 3797 if (c == 0) 3798 dkwhole = dkpart; 3799 else 3800 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart); 3801 } 3802 } 3803 3804 *data = dkwhole; 3805 3806 return 0; 3807 } 3808 3809 /* 3810 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components. 3811 * We end up returning whatever error was returned by the first cache flush 3812 * that fails. 3813 */ 3814 3815 static int 3816 rf_sync_component_cache(RF_Raid_t *raidPtr, int c, int force) 3817 { 3818 int e = 0; 3819 for (int i = 0; i < 5; i++) { 3820 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC, 3821 &force, FWRITE, NOCRED); 3822 if (!e || e == ENODEV) 3823 return e; 3824 printf("raid%d: cache flush[%d] to component %s failed (%d)\n", 3825 raidPtr->raidid, i, raidPtr->Disks[c].devname, e); 3826 } 3827 return e; 3828 } 3829 3830 int 3831 rf_sync_component_caches(RF_Raid_t *raidPtr, int force) 3832 { 3833 int c, error; 3834 3835 error = 0; 3836 for (c = 0; c < raidPtr->numCol; c++) { 3837 if (raidPtr->Disks[c].status == rf_ds_optimal) { 3838 int e = rf_sync_component_cache(raidPtr, c, force); 3839 if (e && !error) 3840 error = e; 3841 } 3842 } 3843 3844 for (c = 0; c < raidPtr->numSpare ; c++) { 3845 int sparecol = raidPtr->numCol + c; 3846 /* Need to ensure that the reconstruct actually completed! */ 3847 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3848 int e = rf_sync_component_cache(raidPtr, sparecol, 3849 force); 3850 if (e && !error) 3851 error = e; 3852 } 3853 } 3854 return error; 3855 } 3856 3857 /* Fill in info with the current status */ 3858 void 3859 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info) 3860 { 3861 3862 memset(info, 0, sizeof(*info)); 3863 3864 if (raidPtr->status != rf_rs_reconstructing) { 3865 info->total = 100; 3866 info->completed = 100; 3867 } else { 3868 info->total = raidPtr->reconControl->numRUsTotal; 3869 info->completed = raidPtr->reconControl->numRUsComplete; 3870 } 3871 info->remaining = info->total - info->completed; 3872 } 3873 3874 /* Fill in info with the current status */ 3875 void 3876 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info) 3877 { 3878 3879 memset(info, 0, sizeof(*info)); 3880 3881 if (raidPtr->parity_rewrite_in_progress == 1) { 3882 info->total = raidPtr->Layout.numStripe; 3883 info->completed = raidPtr->parity_rewrite_stripes_done; 3884 } else { 3885 info->completed = 100; 3886 info->total = 100; 3887 } 3888 info->remaining = info->total - info->completed; 3889 } 3890 3891 /* Fill in info with the current status */ 3892 void 3893 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info) 3894 { 3895 3896 memset(info, 0, sizeof(*info)); 3897 3898 if (raidPtr->copyback_in_progress == 1) { 3899 info->total = raidPtr->Layout.numStripe; 3900 info->completed = raidPtr->copyback_stripes_done; 3901 info->remaining = info->total - info->completed; 3902 } else { 3903 info->remaining = 0; 3904 info->completed = 100; 3905 info->total = 100; 3906 } 3907 } 3908 3909 /* Fill in config with the current info */ 3910 int 3911 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config) 3912 { 3913 int d, i, j; 3914 3915 if (!raidPtr->valid) 3916 return ENODEV; 3917 config->cols = raidPtr->numCol; 3918 config->ndevs = raidPtr->numCol; 3919 if (config->ndevs >= RF_MAX_DISKS) 3920 return ENOMEM; 3921 config->nspares = raidPtr->numSpare; 3922 if (config->nspares >= RF_MAX_DISKS) 3923 return ENOMEM; 3924 config->maxqdepth = raidPtr->maxQueueDepth; 3925 d = 0; 3926 for (j = 0; j < config->cols; j++) { 3927 config->devs[d] = raidPtr->Disks[j]; 3928 d++; 3929 } 3930 for (j = config->cols, i = 0; i < config->nspares; i++, j++) { 3931 config->spares[i] = raidPtr->Disks[j]; 3932 if (config->spares[i].status == rf_ds_rebuilding_spare) { 3933 /* XXX: raidctl(8) expects to see this as a used spare */ 3934 config->spares[i].status = rf_ds_used_spare; 3935 } 3936 } 3937 return 0; 3938 } 3939 3940 int 3941 rf_get_component_label(RF_Raid_t *raidPtr, void *data) 3942 { 3943 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data; 3944 RF_ComponentLabel_t *raid_clabel; 3945 int column = clabel->column; 3946 3947 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare)) 3948 return EINVAL; 3949 raid_clabel = raidget_component_label(raidPtr, column); 3950 memcpy(clabel, raid_clabel, sizeof *clabel); 3951 /* Fix-up for userland. */ 3952 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) 3953 clabel->version = RF_COMPONENT_LABEL_VERSION; 3954 3955 return 0; 3956 } 3957 3958 /* 3959 * Module interface 3960 */ 3961 3962 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs"); 3963 3964 #ifdef _MODULE 3965 CFDRIVER_DECL(raid, DV_DISK, NULL); 3966 #endif 3967 3968 static int raid_modcmd(modcmd_t, void *); 3969 static int raid_modcmd_init(void); 3970 static int raid_modcmd_fini(void); 3971 3972 static int 3973 raid_modcmd(modcmd_t cmd, void *data) 3974 { 3975 int error; 3976 3977 error = 0; 3978 switch (cmd) { 3979 case MODULE_CMD_INIT: 3980 error = raid_modcmd_init(); 3981 break; 3982 case MODULE_CMD_FINI: 3983 error = raid_modcmd_fini(); 3984 break; 3985 default: 3986 error = ENOTTY; 3987 break; 3988 } 3989 return error; 3990 } 3991 3992 static int 3993 raid_modcmd_init(void) 3994 { 3995 int error; 3996 int bmajor, cmajor; 3997 3998 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE); 3999 mutex_enter(&raid_lock); 4000 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 4001 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM); 4002 rf_init_cond2(rf_sparet_wait_cv, "sparetw"); 4003 rf_init_cond2(rf_sparet_resp_cv, "rfgst"); 4004 4005 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 4006 #endif 4007 4008 bmajor = cmajor = -1; 4009 error = devsw_attach("raid", &raid_bdevsw, &bmajor, 4010 &raid_cdevsw, &cmajor); 4011 if (error != 0 && error != EEXIST) { 4012 aprint_error("%s: devsw_attach failed %d\n", __func__, error); 4013 mutex_exit(&raid_lock); 4014 return error; 4015 } 4016 #ifdef _MODULE 4017 error = config_cfdriver_attach(&raid_cd); 4018 if (error != 0) { 4019 aprint_error("%s: config_cfdriver_attach failed %d\n", 4020 __func__, error); 4021 devsw_detach(&raid_bdevsw, &raid_cdevsw); 4022 mutex_exit(&raid_lock); 4023 return error; 4024 } 4025 #endif 4026 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca); 4027 if (error != 0) { 4028 aprint_error("%s: config_cfattach_attach failed %d\n", 4029 __func__, error); 4030 #ifdef _MODULE 4031 config_cfdriver_detach(&raid_cd); 4032 #endif 4033 devsw_detach(&raid_bdevsw, &raid_cdevsw); 4034 mutex_exit(&raid_lock); 4035 return error; 4036 } 4037 4038 raidautoconfigdone = false; 4039 4040 mutex_exit(&raid_lock); 4041 4042 if (error == 0) { 4043 if (rf_BootRaidframe(true) == 0) 4044 aprint_verbose("Kernelized RAIDframe activated\n"); 4045 else 4046 panic("Serious error activating RAID!!"); 4047 } 4048 4049 /* 4050 * Register a finalizer which will be used to auto-config RAID 4051 * sets once all real hardware devices have been found. 4052 */ 4053 error = config_finalize_register(NULL, rf_autoconfig); 4054 if (error != 0) { 4055 aprint_error("WARNING: unable to register RAIDframe " 4056 "finalizer\n"); 4057 error = 0; 4058 } 4059 4060 return error; 4061 } 4062 4063 static int 4064 raid_modcmd_fini(void) 4065 { 4066 int error; 4067 4068 mutex_enter(&raid_lock); 4069 4070 /* Don't allow unload if raid device(s) exist. */ 4071 if (!LIST_EMPTY(&raids)) { 4072 mutex_exit(&raid_lock); 4073 return EBUSY; 4074 } 4075 4076 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca); 4077 if (error != 0) { 4078 aprint_error("%s: cannot detach cfattach\n",__func__); 4079 mutex_exit(&raid_lock); 4080 return error; 4081 } 4082 #ifdef _MODULE 4083 error = config_cfdriver_detach(&raid_cd); 4084 if (error != 0) { 4085 aprint_error("%s: cannot detach cfdriver\n",__func__); 4086 config_cfattach_attach(raid_cd.cd_name, &raid_ca); 4087 mutex_exit(&raid_lock); 4088 return error; 4089 } 4090 #endif 4091 error = devsw_detach(&raid_bdevsw, &raid_cdevsw); 4092 if (error != 0) { 4093 aprint_error("%s: cannot detach devsw\n",__func__); 4094 #ifdef _MODULE 4095 config_cfdriver_attach(&raid_cd); 4096 #endif 4097 config_cfattach_attach(raid_cd.cd_name, &raid_ca); 4098 mutex_exit(&raid_lock); 4099 return error; 4100 } 4101 rf_BootRaidframe(false); 4102 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 4103 rf_destroy_mutex2(rf_sparet_wait_mutex); 4104 rf_destroy_cond2(rf_sparet_wait_cv); 4105 rf_destroy_cond2(rf_sparet_resp_cv); 4106 #endif 4107 mutex_exit(&raid_lock); 4108 mutex_destroy(&raid_lock); 4109 4110 return error; 4111 } 4112