1 /* $NetBSD: rf_netbsdkintf.c,v 1.418 2025/01/08 08:25:36 andvar Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Greg Oster; Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * Copyright (c) 1995 Carnegie-Mellon University. 72 * All rights reserved. 73 * 74 * Authors: Mark Holland, Jim Zelenka 75 * 76 * Permission to use, copy, modify and distribute this software and 77 * its documentation is hereby granted, provided that both the copyright 78 * notice and this permission notice appear in all copies of the 79 * software, derivative works or modified versions, and any portions 80 * thereof, and that both notices appear in supporting documentation. 81 * 82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 85 * 86 * Carnegie Mellon requests users of this software to return to 87 * 88 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 89 * School of Computer Science 90 * Carnegie Mellon University 91 * Pittsburgh PA 15213-3890 92 * 93 * any improvements or extensions that they make and grant Carnegie the 94 * rights to redistribute these changes. 95 */ 96 97 /*********************************************************** 98 * 99 * rf_kintf.c -- the kernel interface routines for RAIDframe 100 * 101 ***********************************************************/ 102 103 #include <sys/cdefs.h> 104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.418 2025/01/08 08:25:36 andvar Exp $"); 105 106 #ifdef _KERNEL_OPT 107 #include "opt_raid_autoconfig.h" 108 #include "opt_compat_netbsd32.h" 109 #endif 110 111 #include <sys/param.h> 112 #include <sys/errno.h> 113 #include <sys/pool.h> 114 #include <sys/proc.h> 115 #include <sys/queue.h> 116 #include <sys/disk.h> 117 #include <sys/device.h> 118 #include <sys/stat.h> 119 #include <sys/ioctl.h> 120 #include <sys/fcntl.h> 121 #include <sys/systm.h> 122 #include <sys/vnode.h> 123 #include <sys/disklabel.h> 124 #include <sys/conf.h> 125 #include <sys/buf.h> 126 #include <sys/bufq.h> 127 #include <sys/reboot.h> 128 #include <sys/kauth.h> 129 #include <sys/module.h> 130 #include <sys/compat_stub.h> 131 132 #include <prop/proplib.h> 133 134 #include <dev/raidframe/raidframevar.h> 135 #include <dev/raidframe/raidframeio.h> 136 #include <dev/raidframe/rf_paritymap.h> 137 138 #include "rf_raid.h" 139 #include "rf_dag.h" 140 #include "rf_dagflags.h" 141 #include "rf_desc.h" 142 #include "rf_diskqueue.h" 143 #include "rf_etimer.h" 144 #include "rf_general.h" 145 #include "rf_kintf.h" 146 #include "rf_options.h" 147 #include "rf_driver.h" 148 #include "rf_parityscan.h" 149 #include "rf_threadstuff.h" 150 151 #include "ioconf.h" 152 153 #ifdef DEBUG 154 int rf_kdebug_level = 0; 155 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 156 #else /* DEBUG */ 157 #define db1_printf(a) { } 158 #endif /* DEBUG */ 159 160 #define DEVICE_XNAME(dev) dev ? device_xname(dev) : "null" 161 162 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 163 static rf_declare_mutex2(rf_sparet_wait_mutex); 164 static rf_declare_cond2(rf_sparet_wait_cv); 165 static rf_declare_cond2(rf_sparet_resp_cv); 166 167 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 168 * spare table */ 169 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 170 * installation process */ 171 #endif 172 173 const int rf_b_pass = (B_PHYS|B_RAW|B_MEDIA_FLAGS); 174 175 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 176 177 /* prototypes */ 178 static void KernelWakeupFunc(struct buf *); 179 static void InitBP(struct buf *, struct vnode *, unsigned, 180 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 181 void *, int); 182 static void raidinit(struct raid_softc *); 183 static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp); 184 static int rf_get_component_caches(RF_Raid_t *raidPtr, int *); 185 186 static int raid_match(device_t, cfdata_t, void *); 187 static void raid_attach(device_t, device_t, void *); 188 static int raid_detach(device_t, int); 189 190 static int raidread_component_area(dev_t, struct vnode *, void *, size_t, 191 daddr_t, daddr_t); 192 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t, 193 daddr_t, daddr_t); 194 195 static int raidwrite_component_label(unsigned, 196 dev_t, struct vnode *, RF_ComponentLabel_t *); 197 static int raidread_component_label(unsigned, 198 dev_t, struct vnode *, RF_ComponentLabel_t *); 199 200 static int raid_diskstart(device_t, struct buf *bp); 201 static int raid_dumpblocks(device_t, void *, daddr_t, int); 202 static int raid_lastclose(device_t); 203 204 static dev_type_open(raidopen); 205 static dev_type_close(raidclose); 206 static dev_type_read(raidread); 207 static dev_type_write(raidwrite); 208 static dev_type_ioctl(raidioctl); 209 static dev_type_strategy(raidstrategy); 210 static dev_type_dump(raiddump); 211 static dev_type_size(raidsize); 212 213 const struct bdevsw raid_bdevsw = { 214 .d_open = raidopen, 215 .d_close = raidclose, 216 .d_strategy = raidstrategy, 217 .d_ioctl = raidioctl, 218 .d_dump = raiddump, 219 .d_psize = raidsize, 220 .d_discard = nodiscard, 221 .d_flag = D_DISK 222 }; 223 224 const struct cdevsw raid_cdevsw = { 225 .d_open = raidopen, 226 .d_close = raidclose, 227 .d_read = raidread, 228 .d_write = raidwrite, 229 .d_ioctl = raidioctl, 230 .d_stop = nostop, 231 .d_tty = notty, 232 .d_poll = nopoll, 233 .d_mmap = nommap, 234 .d_kqfilter = nokqfilter, 235 .d_discard = nodiscard, 236 .d_flag = D_DISK 237 }; 238 239 static struct dkdriver rf_dkdriver = { 240 .d_open = raidopen, 241 .d_close = raidclose, 242 .d_strategy = raidstrategy, 243 .d_diskstart = raid_diskstart, 244 .d_dumpblocks = raid_dumpblocks, 245 .d_lastclose = raid_lastclose, 246 .d_minphys = minphys 247 }; 248 249 #define raidunit(x) DISKUNIT(x) 250 #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc) 251 252 extern struct cfdriver raid_cd; 253 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc), 254 raid_match, raid_attach, raid_detach, NULL, NULL, NULL, 255 DVF_DETACH_SHUTDOWN); 256 257 /* Internal representation of a rf_recon_req */ 258 struct rf_recon_req_internal { 259 RF_RowCol_t col; 260 RF_ReconReqFlags_t flags; 261 void *raidPtr; 262 }; 263 264 /* 265 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 266 * Be aware that large numbers can allow the driver to consume a lot of 267 * kernel memory, especially on writes, and in degraded mode reads. 268 * 269 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 270 * a single 64K write will typically require 64K for the old data, 271 * 64K for the old parity, and 64K for the new parity, for a total 272 * of 192K (if the parity buffer is not re-used immediately). 273 * Even it if is used immediately, that's still 128K, which when multiplied 274 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 275 * 276 * Now in degraded mode, for example, a 64K read on the above setup may 277 * require data reconstruction, which will require *all* of the 4 remaining 278 * disks to participate -- 4 * 32K/disk == 128K again. 279 */ 280 281 #ifndef RAIDOUTSTANDING 282 #define RAIDOUTSTANDING 6 283 #endif 284 285 #define RAIDLABELDEV(dev) \ 286 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 287 288 /* declared here, and made public, for the benefit of KVM stuff.. */ 289 290 static int raidlock(struct raid_softc *); 291 static void raidunlock(struct raid_softc *); 292 293 static int raid_detach_unlocked(struct raid_softc *); 294 295 static void rf_markalldirty(RF_Raid_t *); 296 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *); 297 298 static void rf_ReconThread(struct rf_recon_req_internal *); 299 static void rf_RewriteParityThread(RF_Raid_t *raidPtr); 300 static void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *); 301 static int rf_autoconfig(device_t); 302 static int rf_rescan(void); 303 static void rf_buildroothack(RF_ConfigSet_t *); 304 305 static RF_AutoConfig_t *rf_find_raid_components(void); 306 static RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 307 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 308 static void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 309 static int rf_set_autoconfig(RF_Raid_t *, int); 310 static int rf_set_rootpartition(RF_Raid_t *, int); 311 static void rf_release_all_vps(RF_ConfigSet_t *); 312 static void rf_cleanup_config_set(RF_ConfigSet_t *); 313 static int rf_have_enough_components(RF_ConfigSet_t *); 314 static struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *); 315 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t); 316 317 /* 318 * Debugging, mostly. Set to 0 to not allow autoconfig to take place. 319 * Note that this is overridden by having RAID_AUTOCONFIG as an option 320 * in the kernel config file. 321 */ 322 #ifdef RAID_AUTOCONFIG 323 int raidautoconfig = 1; 324 #else 325 int raidautoconfig = 0; 326 #endif 327 static bool raidautoconfigdone = false; 328 329 struct pool rf_alloclist_pool; /* AllocList */ 330 331 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids); 332 static kmutex_t raid_lock; 333 334 static struct raid_softc * 335 raidcreate(int unit) { 336 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 337 sc->sc_unit = unit; 338 cv_init(&sc->sc_cv, "raidunit"); 339 mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE); 340 return sc; 341 } 342 343 static void 344 raiddestroy(struct raid_softc *sc) { 345 cv_destroy(&sc->sc_cv); 346 mutex_destroy(&sc->sc_mutex); 347 kmem_free(sc, sizeof(*sc)); 348 } 349 350 static struct raid_softc * 351 raidget(int unit, bool create) { 352 struct raid_softc *sc; 353 if (unit < 0) { 354 #ifdef DIAGNOSTIC 355 panic("%s: unit %d!", __func__, unit); 356 #endif 357 return NULL; 358 } 359 mutex_enter(&raid_lock); 360 LIST_FOREACH(sc, &raids, sc_link) { 361 if (sc->sc_unit == unit) { 362 mutex_exit(&raid_lock); 363 return sc; 364 } 365 } 366 mutex_exit(&raid_lock); 367 if (!create) 368 return NULL; 369 sc = raidcreate(unit); 370 mutex_enter(&raid_lock); 371 LIST_INSERT_HEAD(&raids, sc, sc_link); 372 mutex_exit(&raid_lock); 373 return sc; 374 } 375 376 static void 377 raidput(struct raid_softc *sc) { 378 mutex_enter(&raid_lock); 379 LIST_REMOVE(sc, sc_link); 380 mutex_exit(&raid_lock); 381 raiddestroy(sc); 382 } 383 384 void 385 raidattach(int num) 386 { 387 388 /* 389 * Device attachment and associated initialization now occurs 390 * as part of the module initialization. 391 */ 392 } 393 394 static int 395 rf_autoconfig(device_t self) 396 { 397 RF_AutoConfig_t *ac_list; 398 RF_ConfigSet_t *config_sets; 399 400 if (!raidautoconfig || raidautoconfigdone == true) 401 return 0; 402 403 /* XXX This code can only be run once. */ 404 raidautoconfigdone = true; 405 406 #ifdef __HAVE_CPU_BOOTCONF 407 /* 408 * 0. find the boot device if needed first so we can use it later 409 * this needs to be done before we autoconfigure any raid sets, 410 * because if we use wedges we are not going to be able to open 411 * the boot device later 412 */ 413 if (booted_device == NULL) 414 cpu_bootconf(); 415 #endif 416 /* 1. locate all RAID components on the system */ 417 aprint_debug("Searching for RAID components...\n"); 418 ac_list = rf_find_raid_components(); 419 420 /* 2. Sort them into their respective sets. */ 421 config_sets = rf_create_auto_sets(ac_list); 422 423 /* 424 * 3. Evaluate each set and configure the valid ones. 425 * This gets done in rf_buildroothack(). 426 */ 427 rf_buildroothack(config_sets); 428 429 return 1; 430 } 431 432 int 433 rf_inited(const struct raid_softc *rs) { 434 return (rs->sc_flags & RAIDF_INITED) != 0; 435 } 436 437 RF_Raid_t * 438 rf_get_raid(struct raid_softc *rs) { 439 return &rs->sc_r; 440 } 441 442 int 443 rf_get_unit(const struct raid_softc *rs) { 444 return rs->sc_unit; 445 } 446 447 static int 448 rf_containsboot(RF_Raid_t *r, device_t bdv) { 449 const char *bootname; 450 size_t len; 451 452 /* if bdv is NULL, the set can't contain it. exit early. */ 453 if (bdv == NULL) 454 return 0; 455 456 bootname = device_xname(bdv); 457 len = strlen(bootname); 458 459 for (int col = 0; col < r->numCol; col++) { 460 const char *devname = r->Disks[col].devname; 461 devname += sizeof("/dev/") - 1; 462 if (strncmp(devname, "dk", 2) == 0) { 463 const char *parent = 464 dkwedge_get_parent_name(r->Disks[col].dev); 465 if (parent != NULL) 466 devname = parent; 467 } 468 if (strncmp(devname, bootname, len) == 0) { 469 struct raid_softc *sc = r->softc; 470 aprint_debug("raid%d includes boot device %s\n", 471 sc->sc_unit, devname); 472 return 1; 473 } 474 } 475 return 0; 476 } 477 478 static int 479 rf_rescan(void) 480 { 481 RF_AutoConfig_t *ac_list; 482 RF_ConfigSet_t *config_sets, *cset, *next_cset; 483 struct raid_softc *sc; 484 int raid_added; 485 486 ac_list = rf_find_raid_components(); 487 config_sets = rf_create_auto_sets(ac_list); 488 489 raid_added = 1; 490 while (raid_added > 0) { 491 raid_added = 0; 492 cset = config_sets; 493 while (cset != NULL) { 494 next_cset = cset->next; 495 if (rf_have_enough_components(cset) && 496 cset->ac->clabel->autoconfigure == 1) { 497 sc = rf_auto_config_set(cset); 498 if (sc != NULL) { 499 aprint_debug("raid%d: configured ok, rootable %d\n", 500 sc->sc_unit, cset->rootable); 501 /* We added one RAID set */ 502 raid_added++; 503 } else { 504 /* The autoconfig didn't work :( */ 505 aprint_debug("Autoconfig failed\n"); 506 rf_release_all_vps(cset); 507 } 508 } else { 509 /* we're not autoconfiguring this set... 510 release the associated resources */ 511 rf_release_all_vps(cset); 512 } 513 /* cleanup */ 514 rf_cleanup_config_set(cset); 515 cset = next_cset; 516 } 517 if (raid_added > 0) { 518 /* We added at least one RAID set, so re-scan for recursive RAID */ 519 ac_list = rf_find_raid_components(); 520 config_sets = rf_create_auto_sets(ac_list); 521 } 522 } 523 524 return 0; 525 } 526 527 /* 528 * Example setup: 529 * dk1 at wd0: "raid@wd0", 171965 blocks at 32802, type: raidframe 530 * dk3 at wd1: "raid@wd1", 171965 blocks at 32802, type: raidframz 531 * raid1: Components: /dev/dk1 /dev/dk3 532 * dk4 at raid1: "empty@raid1", 8192 blocks at 34, type: msdos 533 * dk5 at raid1: "root@raid1", 163517 blocks at 8226, type: ffs 534 * 535 * If booted from wd0, booted_device will be 536 * disk wd0, startblk = 41092, nblks = 163517 537 * 538 * That is, dk5 with startblk computed from the beginning of wd0 539 * instead of beginning of raid1: 540 * 32802 + 64 (RF_PROTECTED_SECTORS) + 8226 = 41092 541 * 542 * In order to find the boot wedge, we must iterate on each component, 543 * find its offset from disk beginning, and look for the boot wedge with 544 * startblck adjusted. 545 */ 546 static device_t 547 rf_find_bootwedge(struct raid_softc *rsc) 548 { 549 RF_Raid_t *r = &rsc->sc_r; 550 const char *bootname; 551 size_t len; 552 device_t rdev = NULL; 553 554 if (booted_device == NULL) 555 goto out; 556 557 bootname = device_xname(booted_device); 558 len = strlen(bootname); 559 560 aprint_debug("%s: booted_device %s, startblk = %"PRId64", " 561 "nblks = %"PRId64"\n", __func__, 562 bootname, booted_startblk, booted_nblks); 563 564 for (int col = 0; col < r->numCol; col++) { 565 const char *devname = r->Disks[col].devname; 566 const char *parent; 567 struct disk *dk; 568 u_int nwedges; 569 struct dkwedge_info *dkwi; 570 struct dkwedge_list dkwl; 571 size_t dkwi_len; 572 int i; 573 574 devname += sizeof("/dev/") - 1; 575 if (strncmp(devname, "dk", 2) != 0) 576 continue; 577 578 parent = dkwedge_get_parent_name(r->Disks[col].dev); 579 if (parent == NULL) { 580 aprint_debug("%s: cannot find parent for " 581 "component /dev/%s", __func__, devname); 582 continue; 583 } 584 585 if (strncmp(parent, bootname, len) != 0) 586 continue; 587 588 aprint_debug("%s: looking up wedge %s in device %s\n", 589 __func__, devname, parent); 590 591 dk = disk_find(parent); 592 nwedges = dk->dk_nwedges; 593 dkwi_len = sizeof(*dkwi) * nwedges; 594 dkwi = RF_Malloc(dkwi_len); 595 596 dkwl.dkwl_buf = dkwi; 597 dkwl.dkwl_bufsize = dkwi_len; 598 dkwl.dkwl_nwedges = 0; 599 dkwl.dkwl_ncopied = 0; 600 601 if (dkwedge_list(dk, &dkwl, curlwp) == 0) { 602 daddr_t startblk; 603 604 for (i = 0; i < dkwl.dkwl_ncopied; i++) { 605 if (strcmp(dkwi[i].dkw_devname, devname) == 0) 606 break; 607 } 608 609 KASSERT(i < dkwl.dkwl_ncopied); 610 611 aprint_debug("%s: wedge %s, " 612 "startblk = %"PRId64", " 613 "nblks = %"PRId64"\n", 614 __func__, 615 dkwi[i].dkw_devname, 616 dkwi[i].dkw_offset, 617 dkwi[i].dkw_size); 618 619 startblk = booted_startblk 620 - dkwi[i].dkw_offset 621 - RF_PROTECTED_SECTORS; 622 623 aprint_debug("%s: looking for wedge in %s, " 624 "startblk = %"PRId64", " 625 "nblks = %"PRId64"\n", 626 __func__, 627 DEVICE_XNAME(rsc->sc_dksc.sc_dev), 628 startblk, booted_nblks); 629 630 rdev = dkwedge_find_partition(rsc->sc_dksc.sc_dev, 631 startblk, 632 booted_nblks); 633 if (rdev) { 634 aprint_debug("%s: root candidate wedge %s " 635 "shifted from %s\n", __func__, 636 device_xname(rdev), 637 dkwi[i].dkw_devname); 638 goto done; 639 } else { 640 aprint_debug("%s: not found\n", __func__); 641 } 642 } 643 644 aprint_debug("%s: nothing found for col %d\n", __func__, col); 645 done: 646 RF_Free(dkwi, dkwi_len); 647 } 648 649 out: 650 if (!rdev) 651 aprint_debug("%s: nothing found\n", __func__); 652 653 return rdev; 654 } 655 656 static void 657 rf_buildroothack(RF_ConfigSet_t *config_sets) 658 { 659 RF_AutoConfig_t *ac_list; 660 RF_ConfigSet_t *cset; 661 RF_ConfigSet_t *next_cset; 662 int num_root; 663 int raid_added; 664 struct raid_softc *sc, *rsc; 665 struct dk_softc *dksc = NULL; /* XXX gcc -Os: may be used uninit. */ 666 667 sc = rsc = NULL; 668 num_root = 0; 669 670 raid_added = 1; 671 while (raid_added > 0) { 672 raid_added = 0; 673 cset = config_sets; 674 while (cset != NULL) { 675 next_cset = cset->next; 676 if (rf_have_enough_components(cset) && 677 cset->ac->clabel->autoconfigure == 1) { 678 sc = rf_auto_config_set(cset); 679 if (sc != NULL) { 680 aprint_debug("raid%d: configured ok, rootable %d\n", 681 sc->sc_unit, cset->rootable); 682 /* We added one RAID set */ 683 raid_added++; 684 if (cset->rootable) { 685 rsc = sc; 686 num_root++; 687 } 688 } else { 689 /* The autoconfig didn't work :( */ 690 aprint_debug("Autoconfig failed\n"); 691 rf_release_all_vps(cset); 692 } 693 } else { 694 /* we're not autoconfiguring this set... 695 release the associated resources */ 696 rf_release_all_vps(cset); 697 } 698 /* cleanup */ 699 rf_cleanup_config_set(cset); 700 cset = next_cset; 701 } 702 if (raid_added > 0) { 703 /* We added at least one RAID set, so re-scan for recursive RAID */ 704 ac_list = rf_find_raid_components(); 705 config_sets = rf_create_auto_sets(ac_list); 706 } 707 } 708 709 /* if the user has specified what the root device should be 710 then we don't touch booted_device or boothowto... */ 711 712 if (rootspec != NULL) { 713 aprint_debug("%s: rootspec %s\n", __func__, rootspec); 714 return; 715 } 716 717 /* we found something bootable... */ 718 if (num_root == 1) { 719 device_t candidate_root = NULL; 720 dksc = &rsc->sc_dksc; 721 722 if (dksc->sc_dkdev.dk_nwedges != 0) { 723 724 /* Find the wedge we booted from */ 725 candidate_root = rf_find_bootwedge(rsc); 726 727 /* Try first partition */ 728 if (candidate_root == NULL) { 729 size_t i = 0; 730 candidate_root = dkwedge_find_by_parent( 731 device_xname(dksc->sc_dev), &i); 732 } 733 aprint_debug("%s: candidate wedge root %s\n", 734 __func__, DEVICE_XNAME(candidate_root)); 735 } else { 736 candidate_root = dksc->sc_dev; 737 } 738 739 aprint_debug("%s: candidate root = %s, booted_device = %s, " 740 "root_partition = %d, contains_boot=%d\n", 741 __func__, DEVICE_XNAME(candidate_root), 742 DEVICE_XNAME(booted_device), rsc->sc_r.root_partition, 743 rf_containsboot(&rsc->sc_r, booted_device)); 744 745 /* XXX the check for booted_device == NULL can probably be 746 * dropped, now that rf_containsboot handles that case. 747 */ 748 if (booted_device == NULL || 749 rsc->sc_r.root_partition == 1 || 750 rf_containsboot(&rsc->sc_r, booted_device)) { 751 booted_device = candidate_root; 752 booted_method = "raidframe/single"; 753 booted_partition = 0; /* XXX assume 'a' */ 754 aprint_debug("%s: set booted_device = %s\n", __func__, 755 DEVICE_XNAME(booted_device)); 756 } 757 } else if (num_root > 1) { 758 aprint_debug("%s: many roots=%d, %s\n", __func__, num_root, 759 DEVICE_XNAME(booted_device)); 760 761 /* 762 * Maybe the MD code can help. If it cannot, then 763 * setroot() will discover that we have no 764 * booted_device and will ask the user if nothing was 765 * hardwired in the kernel config file 766 */ 767 if (booted_device == NULL) 768 return; 769 770 num_root = 0; 771 mutex_enter(&raid_lock); 772 LIST_FOREACH(sc, &raids, sc_link) { 773 RF_Raid_t *r = &sc->sc_r; 774 if (r->valid == 0) 775 continue; 776 777 if (r->root_partition == 0) 778 continue; 779 780 if (rf_containsboot(r, booted_device)) { 781 num_root++; 782 rsc = sc; 783 dksc = &rsc->sc_dksc; 784 } 785 } 786 mutex_exit(&raid_lock); 787 788 if (num_root == 1) { 789 booted_device = dksc->sc_dev; 790 booted_method = "raidframe/multi"; 791 booted_partition = 0; /* XXX assume 'a' */ 792 } else { 793 /* we can't guess.. require the user to answer... */ 794 boothowto |= RB_ASKNAME; 795 } 796 } 797 } 798 799 static int 800 raidsize(dev_t dev) 801 { 802 struct raid_softc *rs; 803 struct dk_softc *dksc; 804 unsigned int unit; 805 806 unit = raidunit(dev); 807 if ((rs = raidget(unit, false)) == NULL) 808 return -1; 809 dksc = &rs->sc_dksc; 810 811 if ((rs->sc_flags & RAIDF_INITED) == 0) 812 return -1; 813 814 return dk_size(dksc, dev); 815 } 816 817 static int 818 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 819 { 820 unsigned int unit; 821 struct raid_softc *rs; 822 struct dk_softc *dksc; 823 824 unit = raidunit(dev); 825 if ((rs = raidget(unit, false)) == NULL) 826 return ENXIO; 827 dksc = &rs->sc_dksc; 828 829 if ((rs->sc_flags & RAIDF_INITED) == 0) 830 return ENODEV; 831 832 /* 833 Note that blkno is relative to this particular partition. 834 By adding adding RF_PROTECTED_SECTORS, we get a value that 835 is relative to the partition used for the underlying component. 836 */ 837 blkno += RF_PROTECTED_SECTORS; 838 839 return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE); 840 } 841 842 static int 843 raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk) 844 { 845 struct raid_softc *rs = raidsoftc(dev); 846 const struct bdevsw *bdev; 847 RF_Raid_t *raidPtr; 848 int c, sparecol, j, scol, dumpto; 849 int error = 0; 850 851 raidPtr = &rs->sc_r; 852 853 /* we only support dumping to RAID 1 sets */ 854 if (raidPtr->Layout.numDataCol != 1 || 855 raidPtr->Layout.numParityCol != 1) 856 return EINVAL; 857 858 if ((error = raidlock(rs)) != 0) 859 return error; 860 861 /* figure out what device is alive.. */ 862 863 /* 864 Look for a component to dump to. The preference for the 865 component to dump to is as follows: 866 1) the first component 867 2) a used_spare of the first component 868 3) the second component 869 4) a used_spare of the second component 870 */ 871 872 dumpto = -1; 873 for (c = 0; c < raidPtr->numCol; c++) { 874 if (raidPtr->Disks[c].status == rf_ds_optimal) { 875 /* this might be the one */ 876 dumpto = c; 877 break; 878 } 879 } 880 881 /* 882 At this point we have possibly selected a live component. 883 If we didn't find a live component, we now check to see 884 if there is a relevant spared component. 885 */ 886 887 for (c = 0; c < raidPtr->numSpare; c++) { 888 sparecol = raidPtr->numCol + c; 889 890 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 891 /* How about this one? */ 892 scol = -1; 893 for(j=0;j<raidPtr->numCol;j++) { 894 if (raidPtr->Disks[j].spareCol == sparecol) { 895 scol = j; 896 break; 897 } 898 } 899 if (scol == 0) { 900 /* 901 We must have found a spared first 902 component! We'll take that over 903 anything else found so far. (We 904 couldn't have found a real first 905 component before, since this is a 906 used spare, and it's saying that 907 it's replacing the first 908 component.) On reboot (with 909 autoconfiguration turned on) 910 sparecol will become the first 911 component (component0) of this set. 912 */ 913 dumpto = sparecol; 914 break; 915 } else if (scol != -1) { 916 /* 917 Must be a spared second component. 918 We'll dump to that if we havn't found 919 anything else so far. 920 */ 921 if (dumpto == -1) 922 dumpto = sparecol; 923 } 924 } 925 } 926 927 if (dumpto == -1) { 928 /* we couldn't find any live components to dump to!?!? 929 */ 930 error = EINVAL; 931 goto out; 932 } 933 934 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 935 if (bdev == NULL) { 936 error = ENXIO; 937 goto out; 938 } 939 940 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 941 blkno, va, nblk * raidPtr->bytesPerSector); 942 943 out: 944 raidunlock(rs); 945 946 return error; 947 } 948 949 /* ARGSUSED */ 950 static int 951 raidopen(dev_t dev, int flags, int fmt, 952 struct lwp *l) 953 { 954 int unit = raidunit(dev); 955 struct raid_softc *rs; 956 struct dk_softc *dksc; 957 int error = 0; 958 int part, pmask; 959 960 if ((rs = raidget(unit, true)) == NULL) 961 return ENXIO; 962 if ((error = raidlock(rs)) != 0) 963 return error; 964 965 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) { 966 error = EBUSY; 967 goto bad; 968 } 969 970 dksc = &rs->sc_dksc; 971 972 part = DISKPART(dev); 973 pmask = (1 << part); 974 975 if (!DK_BUSY(dksc, pmask) && 976 ((rs->sc_flags & RAIDF_INITED) != 0)) { 977 /* First one... mark things as dirty... Note that we *MUST* 978 have done a configure before this. I DO NOT WANT TO BE 979 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 980 THAT THEY BELONG TOGETHER!!!!! */ 981 /* XXX should check to see if we're only open for reading 982 here... If so, we needn't do this, but then need some 983 other way of keeping track of what's happened.. */ 984 985 rf_markalldirty(&rs->sc_r); 986 } 987 988 if ((rs->sc_flags & RAIDF_INITED) != 0) 989 error = dk_open(dksc, dev, flags, fmt, l); 990 991 bad: 992 raidunlock(rs); 993 994 return error; 995 996 997 } 998 999 static int 1000 raid_lastclose(device_t self) 1001 { 1002 struct raid_softc *rs = raidsoftc(self); 1003 1004 /* Last one... device is not unconfigured yet. 1005 Device shutdown has taken care of setting the 1006 clean bits if RAIDF_INITED is not set 1007 mark things as clean... */ 1008 1009 rf_update_component_labels(&rs->sc_r, 1010 RF_FINAL_COMPONENT_UPDATE); 1011 1012 /* pass to unlocked code */ 1013 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) 1014 rs->sc_flags |= RAIDF_DETACH; 1015 1016 return 0; 1017 } 1018 1019 /* ARGSUSED */ 1020 static int 1021 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 1022 { 1023 int unit = raidunit(dev); 1024 struct raid_softc *rs; 1025 struct dk_softc *dksc; 1026 cfdata_t cf; 1027 int error = 0, do_detach = 0, do_put = 0; 1028 1029 if ((rs = raidget(unit, false)) == NULL) 1030 return ENXIO; 1031 dksc = &rs->sc_dksc; 1032 1033 if ((error = raidlock(rs)) != 0) 1034 return error; 1035 1036 if ((rs->sc_flags & RAIDF_INITED) != 0) { 1037 error = dk_close(dksc, dev, flags, fmt, l); 1038 if ((rs->sc_flags & RAIDF_DETACH) != 0) 1039 do_detach = 1; 1040 } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) 1041 do_put = 1; 1042 1043 raidunlock(rs); 1044 1045 if (do_detach) { 1046 /* free the pseudo device attach bits */ 1047 cf = device_cfdata(dksc->sc_dev); 1048 error = config_detach(dksc->sc_dev, 0); 1049 if (error == 0) 1050 free(cf, M_RAIDFRAME); 1051 } else if (do_put) { 1052 raidput(rs); 1053 } 1054 1055 return error; 1056 1057 } 1058 1059 static void 1060 raid_wakeup(RF_Raid_t *raidPtr) 1061 { 1062 rf_lock_mutex2(raidPtr->iodone_lock); 1063 rf_signal_cond2(raidPtr->iodone_cv); 1064 rf_unlock_mutex2(raidPtr->iodone_lock); 1065 } 1066 1067 static void 1068 raidstrategy(struct buf *bp) 1069 { 1070 unsigned int unit; 1071 struct raid_softc *rs; 1072 struct dk_softc *dksc; 1073 RF_Raid_t *raidPtr; 1074 1075 unit = raidunit(bp->b_dev); 1076 if ((rs = raidget(unit, false)) == NULL) { 1077 bp->b_error = ENXIO; 1078 goto fail; 1079 } 1080 if ((rs->sc_flags & RAIDF_INITED) == 0) { 1081 bp->b_error = ENXIO; 1082 goto fail; 1083 } 1084 dksc = &rs->sc_dksc; 1085 raidPtr = &rs->sc_r; 1086 1087 /* Queue IO only */ 1088 if (dk_strategy_defer(dksc, bp)) 1089 goto done; 1090 1091 /* schedule the IO to happen at the next convenient time */ 1092 raid_wakeup(raidPtr); 1093 1094 done: 1095 return; 1096 1097 fail: 1098 bp->b_resid = bp->b_bcount; 1099 biodone(bp); 1100 } 1101 1102 static int 1103 raid_diskstart(device_t dev, struct buf *bp) 1104 { 1105 struct raid_softc *rs = raidsoftc(dev); 1106 RF_Raid_t *raidPtr; 1107 1108 raidPtr = &rs->sc_r; 1109 if (!raidPtr->valid) { 1110 db1_printf(("raid is not valid..\n")); 1111 return ENODEV; 1112 } 1113 1114 /* XXX */ 1115 bp->b_resid = 0; 1116 1117 return raiddoaccess(raidPtr, bp); 1118 } 1119 1120 void 1121 raiddone(RF_Raid_t *raidPtr, struct buf *bp) 1122 { 1123 struct raid_softc *rs; 1124 struct dk_softc *dksc; 1125 1126 rs = raidPtr->softc; 1127 dksc = &rs->sc_dksc; 1128 1129 dk_done(dksc, bp); 1130 1131 rf_lock_mutex2(raidPtr->mutex); 1132 raidPtr->openings++; 1133 rf_unlock_mutex2(raidPtr->mutex); 1134 1135 /* schedule more IO */ 1136 raid_wakeup(raidPtr); 1137 } 1138 1139 /* ARGSUSED */ 1140 static int 1141 raidread(dev_t dev, struct uio *uio, int flags) 1142 { 1143 int unit = raidunit(dev); 1144 struct raid_softc *rs; 1145 1146 if ((rs = raidget(unit, false)) == NULL) 1147 return ENXIO; 1148 1149 if ((rs->sc_flags & RAIDF_INITED) == 0) 1150 return ENXIO; 1151 1152 return physio(raidstrategy, NULL, dev, B_READ, minphys, uio); 1153 1154 } 1155 1156 /* ARGSUSED */ 1157 static int 1158 raidwrite(dev_t dev, struct uio *uio, int flags) 1159 { 1160 int unit = raidunit(dev); 1161 struct raid_softc *rs; 1162 1163 if ((rs = raidget(unit, false)) == NULL) 1164 return ENXIO; 1165 1166 if ((rs->sc_flags & RAIDF_INITED) == 0) 1167 return ENXIO; 1168 1169 return physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio); 1170 1171 } 1172 1173 static int 1174 raid_detach_unlocked(struct raid_softc *rs) 1175 { 1176 struct dk_softc *dksc = &rs->sc_dksc; 1177 RF_Raid_t *raidPtr; 1178 int error; 1179 1180 raidPtr = &rs->sc_r; 1181 1182 if (DK_BUSY(dksc, 0) || 1183 raidPtr->recon_in_progress != 0 || 1184 raidPtr->parity_rewrite_in_progress != 0) 1185 return EBUSY; 1186 1187 if ((rs->sc_flags & RAIDF_INITED) == 0) 1188 return 0; 1189 1190 rs->sc_flags &= ~RAIDF_SHUTDOWN; 1191 1192 if ((error = rf_Shutdown(raidPtr)) != 0) 1193 return error; 1194 1195 rs->sc_flags &= ~RAIDF_INITED; 1196 1197 /* Kill off any queued buffers */ 1198 dk_drain(dksc); 1199 bufq_free(dksc->sc_bufq); 1200 1201 /* Detach the disk. */ 1202 dkwedge_delall(&dksc->sc_dkdev); 1203 disk_detach(&dksc->sc_dkdev); 1204 disk_destroy(&dksc->sc_dkdev); 1205 dk_detach(dksc); 1206 1207 return 0; 1208 } 1209 1210 int 1211 rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr) 1212 { 1213 struct rf_recon_req_internal *rrint; 1214 1215 if (raidPtr->Layout.map->faultsTolerated == 0) { 1216 /* Can't do this on a RAID 0!! */ 1217 return EINVAL; 1218 } 1219 1220 if (rr->col < 0 || rr->col >= raidPtr->numCol) { 1221 /* bad column */ 1222 return EINVAL; 1223 } 1224 1225 rf_lock_mutex2(raidPtr->mutex); 1226 if (raidPtr->status == rf_rs_reconstructing) { 1227 raidPtr->abortRecon[rr->col] = 1; 1228 } 1229 if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) && 1230 (raidPtr->numFailures > 0)) { 1231 /* some other component has failed. Let's not make 1232 things worse. XXX wrong for RAID6 */ 1233 goto out; 1234 } 1235 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1236 int spareCol = raidPtr->Disks[rr->col].spareCol; 1237 1238 if (spareCol < raidPtr->numCol || 1239 spareCol >= raidPtr->numCol + raidPtr->numSpare) 1240 goto out; 1241 1242 /* 1243 * Fail the spare disk so that we can 1244 * reconstruct on another one. 1245 */ 1246 raidPtr->Disks[spareCol].status = rf_ds_failed; 1247 1248 } 1249 rf_unlock_mutex2(raidPtr->mutex); 1250 1251 /* make a copy of the recon request so that we don't rely on 1252 * the user's buffer */ 1253 rrint = RF_Malloc(sizeof(*rrint)); 1254 if (rrint == NULL) 1255 return(ENOMEM); 1256 rrint->col = rr->col; 1257 rrint->flags = rr->flags; 1258 rrint->raidPtr = raidPtr; 1259 1260 return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread, 1261 rrint, "raid_recon"); 1262 out: 1263 rf_unlock_mutex2(raidPtr->mutex); 1264 return EINVAL; 1265 } 1266 1267 static int 1268 rf_copyinspecificbuf(RF_Config_t *k_cfg) 1269 { 1270 /* allocate a buffer for the layout-specific data, and copy it in */ 1271 if (k_cfg->layoutSpecificSize == 0) 1272 return 0; 1273 1274 if (k_cfg->layoutSpecificSize > 10000) { 1275 /* sanity check */ 1276 return EINVAL; 1277 } 1278 1279 u_char *specific_buf; 1280 specific_buf = RF_Malloc(k_cfg->layoutSpecificSize); 1281 if (specific_buf == NULL) 1282 return ENOMEM; 1283 1284 int retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1285 k_cfg->layoutSpecificSize); 1286 if (retcode) { 1287 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1288 db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode)); 1289 return retcode; 1290 } 1291 1292 k_cfg->layoutSpecific = specific_buf; 1293 return 0; 1294 } 1295 1296 static int 1297 rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg) 1298 { 1299 RF_Config_t *u_cfg = *((RF_Config_t **) data); 1300 1301 if (rs->sc_r.valid) { 1302 /* There is a valid RAID set running on this unit! */ 1303 printf("raid%d: Device already configured!\n", rs->sc_unit); 1304 return EINVAL; 1305 } 1306 1307 /* copy-in the configuration information */ 1308 /* data points to a pointer to the configuration structure */ 1309 *k_cfg = RF_Malloc(sizeof(**k_cfg)); 1310 if (*k_cfg == NULL) { 1311 return ENOMEM; 1312 } 1313 int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t)); 1314 if (retcode == 0) 1315 return 0; 1316 RF_Free(*k_cfg, sizeof(RF_Config_t)); 1317 db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode)); 1318 rs->sc_flags |= RAIDF_SHUTDOWN; 1319 return retcode; 1320 } 1321 1322 int 1323 rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg) 1324 { 1325 int retcode, i; 1326 RF_Raid_t *raidPtr = &rs->sc_r; 1327 1328 rs->sc_flags &= ~RAIDF_SHUTDOWN; 1329 1330 if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0) 1331 goto out; 1332 1333 /* should do some kind of sanity check on the configuration. 1334 * Store the sum of all the bytes in the last byte? */ 1335 1336 /* Force nul-termination on all strings. */ 1337 #define ZERO_FINAL(s) do { s[sizeof(s) - 1] = '\0'; } while (0) 1338 for (i = 0; i < RF_MAXCOL; i++) { 1339 ZERO_FINAL(k_cfg->devnames[0][i]); 1340 } 1341 for (i = 0; i < RF_MAXSPARE; i++) { 1342 ZERO_FINAL(k_cfg->spare_names[i]); 1343 } 1344 for (i = 0; i < RF_MAXDBGV; i++) { 1345 ZERO_FINAL(k_cfg->debugVars[i]); 1346 } 1347 #undef ZERO_FINAL 1348 1349 /* Check some basic limits. */ 1350 if (k_cfg->numCol >= RF_MAXCOL || k_cfg->numCol < 0) { 1351 retcode = EINVAL; 1352 goto out; 1353 } 1354 if (k_cfg->numSpare >= RF_MAXSPARE || k_cfg->numSpare < 0) { 1355 retcode = EINVAL; 1356 goto out; 1357 } 1358 1359 /* configure the system */ 1360 1361 /* 1362 * Clear the entire RAID descriptor, just to make sure 1363 * there is no stale data left in the case of a 1364 * reconfiguration 1365 */ 1366 memset(raidPtr, 0, sizeof(*raidPtr)); 1367 raidPtr->softc = rs; 1368 raidPtr->raidid = rs->sc_unit; 1369 1370 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1371 1372 if (retcode == 0) { 1373 /* allow this many simultaneous IO's to 1374 this RAID device */ 1375 raidPtr->openings = RAIDOUTSTANDING; 1376 1377 raidinit(rs); 1378 raid_wakeup(raidPtr); 1379 rf_markalldirty(raidPtr); 1380 } 1381 1382 /* free the buffers. No return code here. */ 1383 if (k_cfg->layoutSpecificSize) { 1384 RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize); 1385 } 1386 out: 1387 RF_Free(k_cfg, sizeof(RF_Config_t)); 1388 if (retcode) { 1389 /* 1390 * If configuration failed, set sc_flags so that we 1391 * will detach the device when we close it. 1392 */ 1393 rs->sc_flags |= RAIDF_SHUTDOWN; 1394 } 1395 return retcode; 1396 } 1397 1398 #if RF_DISABLED 1399 static int 1400 rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 1401 { 1402 1403 /* XXX check the label for valid stuff... */ 1404 /* Note that some things *should not* get modified -- 1405 the user should be re-initing the labels instead of 1406 trying to patch things. 1407 */ 1408 #ifdef DEBUG 1409 int raidid = raidPtr->raidid; 1410 printf("raid%d: Got component label:\n", raidid); 1411 printf("raid%d: Version: %d\n", raidid, clabel->version); 1412 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1413 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1414 printf("raid%d: Column: %d\n", raidid, clabel->column); 1415 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1416 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1417 printf("raid%d: Status: %d\n", raidid, clabel->status); 1418 #endif /* DEBUG */ 1419 clabel->row = 0; 1420 int column = clabel->column; 1421 1422 if ((column < 0) || (column >= raidPtr->numCol)) { 1423 return(EINVAL); 1424 } 1425 1426 /* XXX this isn't allowed to do anything for now :-) */ 1427 1428 /* XXX and before it is, we need to fill in the rest 1429 of the fields!?!?!?! */ 1430 memcpy(raidget_component_label(raidPtr, column), 1431 clabel, sizeof(*clabel)); 1432 raidflush_component_label(raidPtr, column); 1433 return 0; 1434 } 1435 #endif 1436 1437 static int 1438 rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 1439 { 1440 /* 1441 we only want the serial number from 1442 the above. We get all the rest of the information 1443 from the config that was used to create this RAID 1444 set. 1445 */ 1446 1447 raidPtr->serial_number = clabel->serial_number; 1448 1449 for (int column = 0; column < raidPtr->numCol; column++) { 1450 RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column]; 1451 if (RF_DEAD_DISK(diskPtr->status)) 1452 continue; 1453 RF_ComponentLabel_t *ci_label = raidget_component_label( 1454 raidPtr, column); 1455 /* Zeroing this is important. */ 1456 memset(ci_label, 0, sizeof(*ci_label)); 1457 raid_init_component_label(raidPtr, ci_label); 1458 ci_label->serial_number = raidPtr->serial_number; 1459 ci_label->row = 0; /* we dont' pretend to support more */ 1460 rf_component_label_set_partitionsize(ci_label, 1461 diskPtr->partitionSize); 1462 ci_label->column = column; 1463 raidflush_component_label(raidPtr, column); 1464 /* XXXjld what about the spares? */ 1465 } 1466 1467 return 0; 1468 } 1469 1470 static int 1471 rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr) 1472 { 1473 1474 if (raidPtr->Layout.map->faultsTolerated == 0) { 1475 /* Can't do this on a RAID 0!! */ 1476 return EINVAL; 1477 } 1478 1479 if (raidPtr->recon_in_progress == 1) { 1480 /* a reconstruct is already in progress! */ 1481 return EINVAL; 1482 } 1483 1484 RF_SingleComponent_t component; 1485 memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t)); 1486 component.row = 0; /* we don't support any more */ 1487 int column = component.column; 1488 1489 if ((column < 0) || (column >= raidPtr->numCol)) { 1490 return EINVAL; 1491 } 1492 1493 rf_lock_mutex2(raidPtr->mutex); 1494 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1495 (raidPtr->numFailures > 0)) { 1496 /* XXX 0 above shouldn't be constant!!! */ 1497 /* some component other than this has failed. 1498 Let's not make things worse than they already 1499 are... */ 1500 printf("raid%d: Unable to reconstruct to disk at:\n", 1501 raidPtr->raidid); 1502 printf("raid%d: Col: %d Too many failures.\n", 1503 raidPtr->raidid, column); 1504 rf_unlock_mutex2(raidPtr->mutex); 1505 return EINVAL; 1506 } 1507 1508 if (raidPtr->Disks[column].status == rf_ds_reconstructing) { 1509 printf("raid%d: Unable to reconstruct to disk at:\n", 1510 raidPtr->raidid); 1511 printf("raid%d: Col: %d " 1512 "Reconstruction already occurring!\n", 1513 raidPtr->raidid, column); 1514 1515 rf_unlock_mutex2(raidPtr->mutex); 1516 return EINVAL; 1517 } 1518 1519 if (raidPtr->Disks[column].status == rf_ds_spared) { 1520 rf_unlock_mutex2(raidPtr->mutex); 1521 return EINVAL; 1522 } 1523 1524 rf_unlock_mutex2(raidPtr->mutex); 1525 1526 struct rf_recon_req_internal *rrint; 1527 rrint = RF_Malloc(sizeof(*rrint)); 1528 if (rrint == NULL) 1529 return ENOMEM; 1530 1531 rrint->col = column; 1532 rrint->raidPtr = raidPtr; 1533 1534 return RF_CREATE_THREAD(raidPtr->recon_thread, 1535 rf_ReconstructInPlaceThread, rrint, "raid_reconip"); 1536 } 1537 1538 static int 1539 rf_check_recon_status(RF_Raid_t *raidPtr, int *data) 1540 { 1541 /* 1542 * This makes no sense on a RAID 0, or if we are not reconstructing 1543 * so tell the user it's done. 1544 */ 1545 if (raidPtr->Layout.map->faultsTolerated == 0 || 1546 raidPtr->status != rf_rs_reconstructing) { 1547 *data = 100; 1548 return 0; 1549 } 1550 if (raidPtr->reconControl->numRUsTotal == 0) { 1551 *data = 0; 1552 return 0; 1553 } 1554 *data = (raidPtr->reconControl->numRUsComplete * 100 1555 / raidPtr->reconControl->numRUsTotal); 1556 return 0; 1557 } 1558 1559 /* 1560 * Copy a RF_SingleComponent_t from 'data', ensuring nul-termination 1561 * on the component_name[] array. 1562 */ 1563 static void 1564 rf_copy_single_component(RF_SingleComponent_t *component, void *data) 1565 { 1566 1567 memcpy(component, data, sizeof *component); 1568 component->component_name[sizeof(component->component_name) - 1] = '\0'; 1569 } 1570 1571 static int 1572 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1573 { 1574 int unit = raidunit(dev); 1575 int part, pmask; 1576 struct raid_softc *rs; 1577 struct dk_softc *dksc; 1578 RF_Config_t *k_cfg; 1579 RF_Raid_t *raidPtr; 1580 RF_AccTotals_t *totals; 1581 RF_SingleComponent_t component; 1582 RF_DeviceConfig_t *d_cfg, *ucfgp; 1583 int retcode = 0; 1584 int column; 1585 RF_ComponentLabel_t *clabel; 1586 int d; 1587 1588 if ((rs = raidget(unit, false)) == NULL) 1589 return ENXIO; 1590 1591 dksc = &rs->sc_dksc; 1592 raidPtr = &rs->sc_r; 1593 1594 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev, 1595 (int) DISKPART(dev), (int) unit, cmd)); 1596 1597 /* Only CONFIGURE and RESCAN can be done without the RAID being initialized. */ 1598 switch (cmd) { 1599 case RAIDFRAME_CONFIGURE: 1600 case RAIDFRAME_RESCAN: 1601 break; 1602 default: 1603 if (!rf_inited(rs)) 1604 return ENXIO; 1605 } 1606 1607 switch (cmd) { 1608 /* configure the system */ 1609 case RAIDFRAME_CONFIGURE: 1610 if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0) 1611 return retcode; 1612 return rf_construct(rs, k_cfg); 1613 1614 /* shutdown the system */ 1615 case RAIDFRAME_SHUTDOWN: 1616 1617 part = DISKPART(dev); 1618 pmask = (1 << part); 1619 1620 if ((retcode = raidlock(rs)) != 0) 1621 return retcode; 1622 1623 if (DK_BUSY(dksc, pmask) || 1624 raidPtr->recon_in_progress != 0 || 1625 raidPtr->parity_rewrite_in_progress != 0) 1626 retcode = EBUSY; 1627 else { 1628 /* detach and free on close */ 1629 rs->sc_flags |= RAIDF_SHUTDOWN; 1630 retcode = 0; 1631 } 1632 1633 raidunlock(rs); 1634 1635 return retcode; 1636 case RAIDFRAME_GET_COMPONENT_LABEL: 1637 return rf_get_component_label(raidPtr, data); 1638 1639 #if RF_DISABLED 1640 case RAIDFRAME_SET_COMPONENT_LABEL: 1641 return rf_set_component_label(raidPtr, data); 1642 #endif 1643 1644 case RAIDFRAME_INIT_LABELS: 1645 return rf_init_component_label(raidPtr, data); 1646 1647 case RAIDFRAME_SET_AUTOCONFIG: 1648 d = rf_set_autoconfig(raidPtr, *(int *) data); 1649 printf("raid%d: New autoconfig value is: %d\n", 1650 raidPtr->raidid, d); 1651 *(int *) data = d; 1652 return retcode; 1653 1654 case RAIDFRAME_SET_ROOT: 1655 d = rf_set_rootpartition(raidPtr, *(int *) data); 1656 printf("raid%d: New rootpartition value is: %d\n", 1657 raidPtr->raidid, d); 1658 *(int *) data = d; 1659 return retcode; 1660 1661 /* initialize all parity */ 1662 case RAIDFRAME_REWRITEPARITY: 1663 1664 if (raidPtr->Layout.map->faultsTolerated == 0) { 1665 /* Parity for RAID 0 is trivially correct */ 1666 raidPtr->parity_good = RF_RAID_CLEAN; 1667 return 0; 1668 } 1669 1670 if (raidPtr->parity_rewrite_in_progress == 1) { 1671 /* Re-write is already in progress! */ 1672 return EINVAL; 1673 } 1674 1675 return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1676 rf_RewriteParityThread, raidPtr,"raid_parity"); 1677 1678 case RAIDFRAME_ADD_HOT_SPARE: 1679 rf_copy_single_component(&component, data); 1680 return rf_add_hot_spare(raidPtr, &component); 1681 1682 /* Remove a non hot-spare component, never implemented in userland */ 1683 case RAIDFRAME_DELETE_COMPONENT: 1684 rf_copy_single_component(&component, data); 1685 return rf_delete_component(raidPtr, &component); 1686 1687 case RAIDFRAME_REMOVE_COMPONENT: 1688 rf_copy_single_component(&component, data); 1689 return rf_remove_component(raidPtr, &component); 1690 1691 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1692 rf_copy_single_component(&component, data); 1693 return rf_incorporate_hot_spare(raidPtr, &component); 1694 1695 case RAIDFRAME_REBUILD_IN_PLACE: 1696 return rf_rebuild_in_place(raidPtr, data); 1697 1698 case RAIDFRAME_GET_INFO: 1699 ucfgp = *(RF_DeviceConfig_t **)data; 1700 d_cfg = RF_Malloc(sizeof(*d_cfg)); 1701 if (d_cfg == NULL) 1702 return ENOMEM; 1703 retcode = rf_get_info(raidPtr, d_cfg); 1704 if (retcode == 0) { 1705 retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg)); 1706 } 1707 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1708 return retcode; 1709 1710 case RAIDFRAME_CHECK_PARITY: 1711 *(int *) data = raidPtr->parity_good; 1712 return 0; 1713 1714 case RAIDFRAME_PARITYMAP_STATUS: 1715 if (rf_paritymap_ineligible(raidPtr)) 1716 return EINVAL; 1717 rf_paritymap_status(raidPtr->parity_map, data); 1718 return 0; 1719 1720 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1721 if (rf_paritymap_ineligible(raidPtr)) 1722 return EINVAL; 1723 if (raidPtr->parity_map == NULL) 1724 return ENOENT; /* ??? */ 1725 if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0) 1726 return EINVAL; 1727 return 0; 1728 1729 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1730 if (rf_paritymap_ineligible(raidPtr)) 1731 return EINVAL; 1732 *(int *) data = rf_paritymap_get_disable(raidPtr); 1733 return 0; 1734 1735 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1736 if (rf_paritymap_ineligible(raidPtr)) 1737 return EINVAL; 1738 rf_paritymap_set_disable(raidPtr, *(int *)data); 1739 /* XXX should errors be passed up? */ 1740 return 0; 1741 1742 case RAIDFRAME_RESCAN: 1743 return rf_rescan(); 1744 1745 case RAIDFRAME_RESET_ACCTOTALS: 1746 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1747 return 0; 1748 1749 case RAIDFRAME_GET_ACCTOTALS: 1750 totals = (RF_AccTotals_t *) data; 1751 *totals = raidPtr->acc_totals; 1752 return 0; 1753 1754 case RAIDFRAME_KEEP_ACCTOTALS: 1755 raidPtr->keep_acc_totals = *(int *)data; 1756 return 0; 1757 1758 case RAIDFRAME_GET_SIZE: 1759 *(int *) data = raidPtr->totalSectors; 1760 return 0; 1761 1762 case RAIDFRAME_FAIL_DISK: 1763 return rf_fail_disk(raidPtr, data); 1764 1765 /* copyback is no longer supported */ 1766 case RAIDFRAME_COPYBACK: 1767 return EINVAL; 1768 1769 /* return the percentage completion of reconstruction */ 1770 case RAIDFRAME_CHECK_RECON_STATUS: 1771 return rf_check_recon_status(raidPtr, data); 1772 1773 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1774 rf_check_recon_status_ext(raidPtr, data); 1775 return 0; 1776 1777 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1778 if (raidPtr->Layout.map->faultsTolerated == 0) { 1779 /* This makes no sense on a RAID 0, so tell the 1780 user it's done. */ 1781 *(int *) data = 100; 1782 return 0; 1783 } 1784 if (raidPtr->parity_rewrite_in_progress == 1) { 1785 *(int *) data = 100 * 1786 raidPtr->parity_rewrite_stripes_done / 1787 raidPtr->Layout.numStripe; 1788 } else { 1789 *(int *) data = 100; 1790 } 1791 return 0; 1792 1793 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1794 rf_check_parityrewrite_status_ext(raidPtr, data); 1795 return 0; 1796 1797 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1798 *(int *) data = 100; 1799 return 0; 1800 1801 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1802 rf_check_copyback_status_ext(raidPtr, data); 1803 return 0; 1804 1805 case RAIDFRAME_SET_LAST_UNIT: 1806 for (column = 0; column < raidPtr->numCol; column++) 1807 if (raidPtr->Disks[column].status != rf_ds_optimal) 1808 return EBUSY; 1809 1810 for (column = 0; column < raidPtr->numCol; column++) { 1811 clabel = raidget_component_label(raidPtr, column); 1812 clabel->last_unit = *(int *)data; 1813 raidflush_component_label(raidPtr, column); 1814 } 1815 rs->sc_cflags |= RAIDF_UNIT_CHANGED; 1816 return 0; 1817 1818 /* the sparetable daemon calls this to wait for the kernel to 1819 * need a spare table. this ioctl does not return until a 1820 * spare table is needed. XXX -- calling mpsleep here in the 1821 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1822 * -- I should either compute the spare table in the kernel, 1823 * or have a different -- XXX XXX -- interface (a different 1824 * character device) for delivering the table -- XXX */ 1825 #if RF_DISABLED 1826 case RAIDFRAME_SPARET_WAIT: 1827 rf_lock_mutex2(rf_sparet_wait_mutex); 1828 while (!rf_sparet_wait_queue) 1829 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex); 1830 RF_SparetWait_t *waitreq = rf_sparet_wait_queue; 1831 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1832 rf_unlock_mutex2(rf_sparet_wait_mutex); 1833 1834 /* structure assignment */ 1835 *((RF_SparetWait_t *) data) = *waitreq; 1836 1837 RF_Free(waitreq, sizeof(*waitreq)); 1838 return 0; 1839 1840 /* wakes up a process waiting on SPARET_WAIT and puts an error 1841 * code in it that will cause the dameon to exit */ 1842 case RAIDFRAME_ABORT_SPARET_WAIT: 1843 waitreq = RF_Malloc(sizeof(*waitreq)); 1844 waitreq->fcol = -1; 1845 rf_lock_mutex2(rf_sparet_wait_mutex); 1846 waitreq->next = rf_sparet_wait_queue; 1847 rf_sparet_wait_queue = waitreq; 1848 rf_broadcast_cond2(rf_sparet_wait_cv); 1849 rf_unlock_mutex2(rf_sparet_wait_mutex); 1850 return 0; 1851 1852 /* used by the spare table daemon to deliver a spare table 1853 * into the kernel */ 1854 case RAIDFRAME_SEND_SPARET: 1855 1856 /* install the spare table */ 1857 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1858 1859 /* respond to the requestor. the return status of the spare 1860 * table installation is passed in the "fcol" field */ 1861 waitred = RF_Malloc(sizeof(*waitreq)); 1862 waitreq->fcol = retcode; 1863 rf_lock_mutex2(rf_sparet_wait_mutex); 1864 waitreq->next = rf_sparet_resp_queue; 1865 rf_sparet_resp_queue = waitreq; 1866 rf_broadcast_cond2(rf_sparet_resp_cv); 1867 rf_unlock_mutex2(rf_sparet_wait_mutex); 1868 1869 return retcode; 1870 #endif 1871 default: 1872 /* 1873 * Don't bother trying to load compat modules 1874 * if it is not our ioctl. This is more efficient 1875 * and makes rump tests not depend on compat code 1876 */ 1877 if (IOCGROUP(cmd) != 'r') 1878 break; 1879 #ifdef _LP64 1880 if ((l->l_proc->p_flag & PK_32) != 0) { 1881 module_autoload("compat_netbsd32_raid", 1882 MODULE_CLASS_EXEC); 1883 MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook, 1884 (rs, cmd, data), enosys(), retcode); 1885 if (retcode != EPASSTHROUGH) 1886 return retcode; 1887 } 1888 #endif 1889 module_autoload("compat_raid_80", MODULE_CLASS_EXEC); 1890 MODULE_HOOK_CALL(raidframe_ioctl_80_hook, 1891 (rs, cmd, data), enosys(), retcode); 1892 if (retcode != EPASSTHROUGH) 1893 return retcode; 1894 1895 module_autoload("compat_raid_50", MODULE_CLASS_EXEC); 1896 MODULE_HOOK_CALL(raidframe_ioctl_50_hook, 1897 (rs, cmd, data), enosys(), retcode); 1898 if (retcode != EPASSTHROUGH) 1899 return retcode; 1900 break; /* fall through to the os-specific code below */ 1901 1902 } 1903 1904 if (!raidPtr->valid) 1905 return EINVAL; 1906 1907 /* 1908 * Add support for "regular" device ioctls here. 1909 */ 1910 1911 switch (cmd) { 1912 case DIOCGCACHE: 1913 retcode = rf_get_component_caches(raidPtr, (int *)data); 1914 break; 1915 1916 case DIOCCACHESYNC: 1917 retcode = rf_sync_component_caches(raidPtr, *(int *)data); 1918 break; 1919 1920 default: 1921 retcode = dk_ioctl(dksc, dev, cmd, data, flag, l); 1922 break; 1923 } 1924 1925 return retcode; 1926 1927 } 1928 1929 1930 /* raidinit -- complete the rest of the initialization for the 1931 RAIDframe device. */ 1932 1933 1934 static void 1935 raidinit(struct raid_softc *rs) 1936 { 1937 cfdata_t cf; 1938 unsigned int unit; 1939 struct dk_softc *dksc = &rs->sc_dksc; 1940 RF_Raid_t *raidPtr = &rs->sc_r; 1941 device_t dev; 1942 1943 unit = raidPtr->raidid; 1944 1945 /* XXX doesn't check bounds. */ 1946 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit); 1947 1948 /* attach the pseudo device */ 1949 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1950 cf->cf_name = raid_cd.cd_name; 1951 cf->cf_atname = raid_cd.cd_name; 1952 cf->cf_unit = unit; 1953 cf->cf_fstate = FSTATE_STAR; 1954 1955 dev = config_attach_pseudo(cf); 1956 if (dev == NULL) { 1957 printf("raid%d: config_attach_pseudo failed\n", 1958 raidPtr->raidid); 1959 free(cf, M_RAIDFRAME); 1960 return; 1961 } 1962 1963 /* provide a backpointer to the real softc */ 1964 raidsoftc(dev) = rs; 1965 1966 /* disk_attach actually creates space for the CPU disklabel, among 1967 * other things, so it's critical to call this *BEFORE* we try putzing 1968 * with disklabels. */ 1969 dk_init(dksc, dev, DKTYPE_RAID); 1970 disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver); 1971 1972 /* XXX There may be a weird interaction here between this, and 1973 * protectedSectors, as used in RAIDframe. */ 1974 1975 rs->sc_size = raidPtr->totalSectors; 1976 1977 /* Attach dk and disk subsystems */ 1978 dk_attach(dksc); 1979 disk_attach(&dksc->sc_dkdev); 1980 rf_set_geometry(rs, raidPtr); 1981 1982 bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK); 1983 1984 /* mark unit as usuable */ 1985 rs->sc_flags |= RAIDF_INITED; 1986 1987 dkwedge_discover(&dksc->sc_dkdev); 1988 } 1989 1990 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1991 /* wake up the daemon & tell it to get us a spare table 1992 * XXX 1993 * the entries in the queues should be tagged with the raidPtr 1994 * so that in the extremely rare case that two recons happen at once, 1995 * we know for which device were requesting a spare table 1996 * XXX 1997 * 1998 * XXX This code is not currently used. GO 1999 */ 2000 int 2001 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 2002 { 2003 int retcode; 2004 2005 rf_lock_mutex2(rf_sparet_wait_mutex); 2006 req->next = rf_sparet_wait_queue; 2007 rf_sparet_wait_queue = req; 2008 rf_broadcast_cond2(rf_sparet_wait_cv); 2009 2010 /* mpsleep unlocks the mutex */ 2011 while (!rf_sparet_resp_queue) { 2012 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex); 2013 } 2014 req = rf_sparet_resp_queue; 2015 rf_sparet_resp_queue = req->next; 2016 rf_unlock_mutex2(rf_sparet_wait_mutex); 2017 2018 retcode = req->fcol; 2019 RF_Free(req, sizeof(*req)); /* this is not the same req as we 2020 * alloc'd */ 2021 return retcode; 2022 } 2023 #endif 2024 2025 /* a wrapper around rf_DoAccess that extracts appropriate info from the 2026 * bp & passes it down. 2027 * any calls originating in the kernel must use non-blocking I/O 2028 * do some extra sanity checking to return "appropriate" error values for 2029 * certain conditions (to make some standard utilities work) 2030 * 2031 * Formerly known as: rf_DoAccessKernel 2032 */ 2033 void 2034 raidstart(RF_Raid_t *raidPtr) 2035 { 2036 struct raid_softc *rs; 2037 struct dk_softc *dksc; 2038 2039 rs = raidPtr->softc; 2040 dksc = &rs->sc_dksc; 2041 /* quick check to see if anything has died recently */ 2042 rf_lock_mutex2(raidPtr->mutex); 2043 if (raidPtr->numNewFailures > 0) { 2044 rf_unlock_mutex2(raidPtr->mutex); 2045 rf_update_component_labels(raidPtr, 2046 RF_NORMAL_COMPONENT_UPDATE); 2047 rf_lock_mutex2(raidPtr->mutex); 2048 raidPtr->numNewFailures--; 2049 } 2050 rf_unlock_mutex2(raidPtr->mutex); 2051 2052 if ((rs->sc_flags & RAIDF_INITED) == 0) { 2053 printf("raid%d: raidstart not ready\n", raidPtr->raidid); 2054 return; 2055 } 2056 2057 dk_start(dksc, NULL); 2058 } 2059 2060 static int 2061 raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp) 2062 { 2063 RF_SectorCount_t num_blocks, pb, sum; 2064 RF_RaidAddr_t raid_addr; 2065 daddr_t blocknum; 2066 int rc; 2067 2068 rf_lock_mutex2(raidPtr->mutex); 2069 if (raidPtr->openings == 0) { 2070 rf_unlock_mutex2(raidPtr->mutex); 2071 return EAGAIN; 2072 } 2073 rf_unlock_mutex2(raidPtr->mutex); 2074 2075 blocknum = bp->b_rawblkno; 2076 2077 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 2078 (int) blocknum)); 2079 2080 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 2081 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 2082 2083 /* *THIS* is where we adjust what block we're going to... 2084 * but DO NOT TOUCH bp->b_blkno!!! */ 2085 raid_addr = blocknum; 2086 2087 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 2088 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 2089 sum = raid_addr + num_blocks + pb; 2090 if (1 || rf_debugKernelAccess) { 2091 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 2092 (int) raid_addr, (int) sum, (int) num_blocks, 2093 (int) pb, (int) bp->b_resid)); 2094 } 2095 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 2096 || (sum < num_blocks) || (sum < pb)) { 2097 rc = ENOSPC; 2098 goto done; 2099 } 2100 /* 2101 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 2102 */ 2103 2104 if (bp->b_bcount & raidPtr->sectorMask) { 2105 rc = ENOSPC; 2106 goto done; 2107 } 2108 db1_printf(("Calling DoAccess..\n")); 2109 2110 2111 rf_lock_mutex2(raidPtr->mutex); 2112 raidPtr->openings--; 2113 rf_unlock_mutex2(raidPtr->mutex); 2114 2115 /* don't ever condition on bp->b_flags & B_WRITE. 2116 * always condition on B_READ instead */ 2117 2118 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 2119 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 2120 raid_addr, num_blocks, 2121 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 2122 2123 done: 2124 return rc; 2125 } 2126 2127 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 2128 2129 int 2130 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 2131 { 2132 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2133 struct buf *bp; 2134 2135 req->queue = queue; 2136 bp = req->bp; 2137 2138 switch (req->type) { 2139 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2140 /* XXX need to do something extra here.. */ 2141 /* I'm leaving this in, as I've never actually seen it used, 2142 * and I'd like folks to report it... GO */ 2143 printf("%s: WAKEUP CALLED\n", __func__); 2144 queue->numOutstanding++; 2145 2146 bp->b_flags = 0; 2147 bp->b_private = req; 2148 2149 KernelWakeupFunc(bp); 2150 break; 2151 2152 case RF_IO_TYPE_READ: 2153 case RF_IO_TYPE_WRITE: 2154 #if RF_ACC_TRACE > 0 2155 if (req->tracerec) { 2156 RF_ETIMER_START(req->tracerec->timer); 2157 } 2158 #endif 2159 InitBP(bp, queue->rf_cinfo->ci_vp, 2160 op, queue->rf_cinfo->ci_dev, 2161 req->sectorOffset, req->numSector, 2162 req->buf, KernelWakeupFunc, (void *) req, 2163 queue->raidPtr->logBytesPerSector); 2164 2165 if (rf_debugKernelAccess) { 2166 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2167 (long) bp->b_blkno)); 2168 } 2169 queue->numOutstanding++; 2170 queue->last_deq_sector = req->sectorOffset; 2171 /* acc wouldn't have been let in if there were any pending 2172 * reqs at any other priority */ 2173 queue->curPriority = req->priority; 2174 2175 db1_printf(("Going for %c to unit %d col %d\n", 2176 req->type, queue->raidPtr->raidid, 2177 queue->col)); 2178 db1_printf(("sector %d count %d (%d bytes) %d\n", 2179 (int) req->sectorOffset, (int) req->numSector, 2180 (int) (req->numSector << 2181 queue->raidPtr->logBytesPerSector), 2182 (int) queue->raidPtr->logBytesPerSector)); 2183 2184 /* 2185 * XXX: drop lock here since this can block at 2186 * least with backing SCSI devices. Retake it 2187 * to minimize fuss with calling interfaces. 2188 */ 2189 2190 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam"); 2191 bdev_strategy(bp); 2192 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam"); 2193 break; 2194 2195 default: 2196 panic("bad req->type in rf_DispatchKernelIO"); 2197 } 2198 db1_printf(("Exiting from DispatchKernelIO\n")); 2199 2200 return 0; 2201 } 2202 /* this is the callback function associated with a I/O invoked from 2203 kernel code. 2204 */ 2205 static void 2206 KernelWakeupFunc(struct buf *bp) 2207 { 2208 RF_DiskQueueData_t *req = NULL; 2209 RF_DiskQueue_t *queue; 2210 2211 db1_printf(("recovering the request queue:\n")); 2212 2213 req = bp->b_private; 2214 2215 queue = (RF_DiskQueue_t *) req->queue; 2216 2217 rf_lock_mutex2(queue->raidPtr->iodone_lock); 2218 2219 #if RF_ACC_TRACE > 0 2220 if (req->tracerec) { 2221 RF_ETIMER_STOP(req->tracerec->timer); 2222 RF_ETIMER_EVAL(req->tracerec->timer); 2223 rf_lock_mutex2(rf_tracing_mutex); 2224 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2225 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2226 req->tracerec->num_phys_ios++; 2227 rf_unlock_mutex2(rf_tracing_mutex); 2228 } 2229 #endif 2230 2231 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2232 * ballistic, and mark the component as hosed... */ 2233 2234 if (bp->b_error != 0) { 2235 /* Mark the disk as dead */ 2236 /* but only mark it once... */ 2237 /* and only if it wouldn't leave this RAID set 2238 completely broken */ 2239 if (((queue->raidPtr->Disks[queue->col].status == 2240 rf_ds_optimal) || 2241 (queue->raidPtr->Disks[queue->col].status == 2242 rf_ds_used_spare)) && 2243 (queue->raidPtr->numFailures < 2244 queue->raidPtr->Layout.map->faultsTolerated)) { 2245 printf("raid%d: IO Error (%d). Marking %s as failed.\n", 2246 queue->raidPtr->raidid, 2247 bp->b_error, 2248 queue->raidPtr->Disks[queue->col].devname); 2249 queue->raidPtr->Disks[queue->col].status = 2250 rf_ds_failed; 2251 queue->raidPtr->status = rf_rs_degraded; 2252 queue->raidPtr->numFailures++; 2253 queue->raidPtr->numNewFailures++; 2254 } else { /* Disk is already dead... */ 2255 /* printf("Disk already marked as dead!\n"); */ 2256 } 2257 2258 } 2259 2260 /* Fill in the error value */ 2261 req->error = bp->b_error; 2262 2263 /* Drop this one on the "finished" queue... */ 2264 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2265 2266 /* Let the raidio thread know there is work to be done. */ 2267 rf_signal_cond2(queue->raidPtr->iodone_cv); 2268 2269 rf_unlock_mutex2(queue->raidPtr->iodone_lock); 2270 } 2271 2272 2273 /* 2274 * initialize a buf structure for doing an I/O in the kernel. 2275 */ 2276 static void 2277 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2278 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2279 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector) 2280 { 2281 bp->b_flags = rw_flag | (bp->b_flags & rf_b_pass); 2282 bp->b_oflags = 0; 2283 bp->b_cflags = 0; 2284 bp->b_bcount = numSect << logBytesPerSector; 2285 bp->b_bufsize = bp->b_bcount; 2286 bp->b_error = 0; 2287 bp->b_dev = dev; 2288 bp->b_data = bf; 2289 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT; 2290 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2291 if (bp->b_bcount == 0) { 2292 panic("bp->b_bcount is zero in InitBP!!"); 2293 } 2294 bp->b_iodone = cbFunc; 2295 bp->b_private = cbArg; 2296 } 2297 2298 /* 2299 * Wait interruptibly for an exclusive lock. 2300 * 2301 * XXX 2302 * Several drivers do this; it should be abstracted and made MP-safe. 2303 * (Hmm... where have we seen this warning before :-> GO ) 2304 */ 2305 static int 2306 raidlock(struct raid_softc *rs) 2307 { 2308 int error; 2309 2310 error = 0; 2311 mutex_enter(&rs->sc_mutex); 2312 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2313 rs->sc_flags |= RAIDF_WANTED; 2314 error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex); 2315 if (error != 0) 2316 goto done; 2317 } 2318 rs->sc_flags |= RAIDF_LOCKED; 2319 done: 2320 mutex_exit(&rs->sc_mutex); 2321 return error; 2322 } 2323 /* 2324 * Unlock and wake up any waiters. 2325 */ 2326 static void 2327 raidunlock(struct raid_softc *rs) 2328 { 2329 2330 mutex_enter(&rs->sc_mutex); 2331 rs->sc_flags &= ~RAIDF_LOCKED; 2332 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2333 rs->sc_flags &= ~RAIDF_WANTED; 2334 cv_broadcast(&rs->sc_cv); 2335 } 2336 mutex_exit(&rs->sc_mutex); 2337 } 2338 2339 2340 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2341 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2342 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE 2343 2344 static daddr_t 2345 rf_component_info_offset(void) 2346 { 2347 2348 return RF_COMPONENT_INFO_OFFSET; 2349 } 2350 2351 static daddr_t 2352 rf_component_info_size(unsigned secsize) 2353 { 2354 daddr_t info_size; 2355 2356 KASSERT(secsize); 2357 if (secsize > RF_COMPONENT_INFO_SIZE) 2358 info_size = secsize; 2359 else 2360 info_size = RF_COMPONENT_INFO_SIZE; 2361 2362 return info_size; 2363 } 2364 2365 static daddr_t 2366 rf_parity_map_offset(RF_Raid_t *raidPtr) 2367 { 2368 daddr_t map_offset; 2369 2370 KASSERT(raidPtr->bytesPerSector); 2371 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE) 2372 map_offset = raidPtr->bytesPerSector; 2373 else 2374 map_offset = RF_COMPONENT_INFO_SIZE; 2375 map_offset += rf_component_info_offset(); 2376 2377 return map_offset; 2378 } 2379 2380 static daddr_t 2381 rf_parity_map_size(RF_Raid_t *raidPtr) 2382 { 2383 daddr_t map_size; 2384 2385 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE) 2386 map_size = raidPtr->bytesPerSector; 2387 else 2388 map_size = RF_PARITY_MAP_SIZE; 2389 2390 return map_size; 2391 } 2392 2393 int 2394 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col) 2395 { 2396 RF_ComponentLabel_t *clabel; 2397 2398 clabel = raidget_component_label(raidPtr, col); 2399 clabel->clean = RF_RAID_CLEAN; 2400 raidflush_component_label(raidPtr, col); 2401 return(0); 2402 } 2403 2404 2405 int 2406 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col) 2407 { 2408 RF_ComponentLabel_t *clabel; 2409 2410 clabel = raidget_component_label(raidPtr, col); 2411 clabel->clean = RF_RAID_DIRTY; 2412 raidflush_component_label(raidPtr, col); 2413 return(0); 2414 } 2415 2416 int 2417 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2418 { 2419 KASSERT(raidPtr->bytesPerSector); 2420 2421 return raidread_component_label(raidPtr->bytesPerSector, 2422 raidPtr->Disks[col].dev, 2423 raidPtr->raid_cinfo[col].ci_vp, 2424 &raidPtr->raid_cinfo[col].ci_label); 2425 } 2426 2427 RF_ComponentLabel_t * 2428 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2429 { 2430 return &raidPtr->raid_cinfo[col].ci_label; 2431 } 2432 2433 int 2434 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2435 { 2436 RF_ComponentLabel_t *label; 2437 2438 label = &raidPtr->raid_cinfo[col].ci_label; 2439 label->mod_counter = raidPtr->mod_counter; 2440 #ifndef RF_NO_PARITY_MAP 2441 label->parity_map_modcount = label->mod_counter; 2442 #endif 2443 return raidwrite_component_label(raidPtr->bytesPerSector, 2444 raidPtr->Disks[col].dev, 2445 raidPtr->raid_cinfo[col].ci_vp, label); 2446 } 2447 2448 /* 2449 * Swap the label endianness. 2450 * 2451 * Everything in the component label is 4-byte-swapped except the version, 2452 * which is kept in the byte-swapped version at all times, and indicates 2453 * for the writer that a swap is necessary. 2454 * 2455 * For reads it is expected that out_label == clabel, but writes expect 2456 * separate labels so only the re-swapped label is written out to disk, 2457 * leaving the swapped-except-version internally. 2458 * 2459 * Only support swapping label version 2. 2460 */ 2461 static void 2462 rf_swap_label(RF_ComponentLabel_t *clabel, RF_ComponentLabel_t *out_label) 2463 { 2464 int *in, *out, *in_last; 2465 2466 KASSERT(clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)); 2467 2468 /* Don't swap the label, but do copy it. */ 2469 out_label->version = clabel->version; 2470 2471 in = &clabel->serial_number; 2472 in_last = &clabel->future_use2[42]; 2473 out = &out_label->serial_number; 2474 2475 for (; in < in_last; in++, out++) 2476 *out = bswap32(*in); 2477 } 2478 2479 static int 2480 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2481 RF_ComponentLabel_t *clabel) 2482 { 2483 int error; 2484 2485 error = raidread_component_area(dev, b_vp, clabel, 2486 sizeof(RF_ComponentLabel_t), 2487 rf_component_info_offset(), 2488 rf_component_info_size(secsize)); 2489 2490 if (error == 0 && 2491 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) { 2492 rf_swap_label(clabel, clabel); 2493 } 2494 2495 return error; 2496 } 2497 2498 /* ARGSUSED */ 2499 static int 2500 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data, 2501 size_t msize, daddr_t offset, daddr_t dsize) 2502 { 2503 struct buf *bp; 2504 int error; 2505 2506 /* XXX should probably ensure that we don't try to do this if 2507 someone has changed rf_protected_sectors. */ 2508 2509 if (b_vp == NULL) { 2510 /* For whatever reason, this component is not valid. 2511 Don't try to read a component label from it. */ 2512 return(EINVAL); 2513 } 2514 2515 /* get a block of the appropriate size... */ 2516 bp = geteblk((int)dsize); 2517 bp->b_dev = dev; 2518 2519 /* get our ducks in a row for the read */ 2520 bp->b_blkno = offset / DEV_BSIZE; 2521 bp->b_bcount = dsize; 2522 bp->b_flags |= B_READ; 2523 bp->b_resid = dsize; 2524 2525 bdev_strategy(bp); 2526 error = biowait(bp); 2527 2528 if (!error) { 2529 memcpy(data, bp->b_data, msize); 2530 } 2531 2532 brelse(bp, 0); 2533 return(error); 2534 } 2535 2536 static int 2537 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2538 RF_ComponentLabel_t *clabel) 2539 { 2540 RF_ComponentLabel_t *clabel_write = clabel; 2541 RF_ComponentLabel_t lclabel; 2542 int error; 2543 2544 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) { 2545 clabel_write = &lclabel; 2546 rf_swap_label(clabel, clabel_write); 2547 } 2548 error = raidwrite_component_area(dev, b_vp, clabel_write, 2549 sizeof(RF_ComponentLabel_t), 2550 rf_component_info_offset(), 2551 rf_component_info_size(secsize)); 2552 2553 return error; 2554 } 2555 2556 /* ARGSUSED */ 2557 static int 2558 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data, 2559 size_t msize, daddr_t offset, daddr_t dsize) 2560 { 2561 struct buf *bp; 2562 int error; 2563 2564 /* get a block of the appropriate size... */ 2565 bp = geteblk((int)dsize); 2566 bp->b_dev = dev; 2567 2568 /* get our ducks in a row for the write */ 2569 bp->b_blkno = offset / DEV_BSIZE; 2570 bp->b_bcount = dsize; 2571 bp->b_flags |= B_WRITE; 2572 bp->b_resid = dsize; 2573 2574 memset(bp->b_data, 0, dsize); 2575 memcpy(bp->b_data, data, msize); 2576 2577 bdev_strategy(bp); 2578 error = biowait(bp); 2579 brelse(bp, 0); 2580 if (error) { 2581 #if 1 2582 printf("Failed to write RAID component info!\n"); 2583 #endif 2584 } 2585 2586 return(error); 2587 } 2588 2589 void 2590 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2591 { 2592 int c; 2593 2594 for (c = 0; c < raidPtr->numCol; c++) { 2595 /* Skip dead disks. */ 2596 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2597 continue; 2598 /* XXXjld: what if an error occurs here? */ 2599 raidwrite_component_area(raidPtr->Disks[c].dev, 2600 raidPtr->raid_cinfo[c].ci_vp, map, 2601 RF_PARITYMAP_NBYTE, 2602 rf_parity_map_offset(raidPtr), 2603 rf_parity_map_size(raidPtr)); 2604 } 2605 } 2606 2607 void 2608 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2609 { 2610 struct rf_paritymap_ondisk tmp; 2611 int c,first; 2612 2613 first=1; 2614 for (c = 0; c < raidPtr->numCol; c++) { 2615 /* Skip dead disks. */ 2616 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2617 continue; 2618 raidread_component_area(raidPtr->Disks[c].dev, 2619 raidPtr->raid_cinfo[c].ci_vp, &tmp, 2620 RF_PARITYMAP_NBYTE, 2621 rf_parity_map_offset(raidPtr), 2622 rf_parity_map_size(raidPtr)); 2623 if (first) { 2624 memcpy(map, &tmp, sizeof(*map)); 2625 first = 0; 2626 } else { 2627 rf_paritymap_merge(map, &tmp); 2628 } 2629 } 2630 } 2631 2632 void 2633 rf_markalldirty(RF_Raid_t *raidPtr) 2634 { 2635 RF_ComponentLabel_t *clabel; 2636 int sparecol; 2637 int c; 2638 int j; 2639 int scol = -1; 2640 2641 raidPtr->mod_counter++; 2642 for (c = 0; c < raidPtr->numCol; c++) { 2643 /* we don't want to touch (at all) a disk that has 2644 failed */ 2645 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2646 clabel = raidget_component_label(raidPtr, c); 2647 if (clabel->status == rf_ds_spared) { 2648 /* XXX do something special... 2649 but whatever you do, don't 2650 try to access it!! */ 2651 } else { 2652 raidmarkdirty(raidPtr, c); 2653 } 2654 } 2655 } 2656 2657 for (c = 0; c < raidPtr->numSpare ; c++) { 2658 sparecol = raidPtr->numCol + c; 2659 2660 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2661 /* 2662 2663 we claim this disk is "optimal" if it's 2664 rf_ds_used_spare, as that means it should be 2665 directly substitutable for the disk it replaced. 2666 We note that too... 2667 2668 */ 2669 2670 for(j=0;j<raidPtr->numCol;j++) { 2671 if (raidPtr->Disks[j].spareCol == sparecol) { 2672 scol = j; 2673 break; 2674 } 2675 } 2676 2677 clabel = raidget_component_label(raidPtr, sparecol); 2678 /* make sure status is noted */ 2679 2680 raid_init_component_label(raidPtr, clabel); 2681 2682 clabel->row = 0; 2683 clabel->column = scol; 2684 /* Note: we *don't* change status from rf_ds_used_spare 2685 to rf_ds_optimal */ 2686 /* clabel.status = rf_ds_optimal; */ 2687 2688 raidmarkdirty(raidPtr, sparecol); 2689 } 2690 } 2691 } 2692 2693 2694 void 2695 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2696 { 2697 RF_ComponentLabel_t *clabel; 2698 int sparecol; 2699 int c; 2700 int j; 2701 int scol; 2702 struct raid_softc *rs = raidPtr->softc; 2703 2704 scol = -1; 2705 2706 /* XXX should do extra checks to make sure things really are clean, 2707 rather than blindly setting the clean bit... */ 2708 2709 raidPtr->mod_counter++; 2710 2711 for (c = 0; c < raidPtr->numCol; c++) { 2712 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2713 clabel = raidget_component_label(raidPtr, c); 2714 /* make sure status is noted */ 2715 clabel->status = rf_ds_optimal; 2716 2717 /* note what unit we are configured as */ 2718 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0) 2719 clabel->last_unit = raidPtr->raidid; 2720 2721 raidflush_component_label(raidPtr, c); 2722 if (final == RF_FINAL_COMPONENT_UPDATE) { 2723 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2724 raidmarkclean(raidPtr, c); 2725 } 2726 } 2727 } 2728 /* else we don't touch it.. */ 2729 } 2730 2731 for (c = 0; c < raidPtr->numSpare ; c++) { 2732 sparecol = raidPtr->numCol + c; 2733 2734 /* Need to ensure that the reconstruct actually completed! */ 2735 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2736 /* 2737 2738 we claim this disk is "optimal" if it's 2739 rf_ds_used_spare, as that means it should be 2740 directly substitutable for the disk it replaced. 2741 We note that too... 2742 2743 */ 2744 2745 for(j=0;j<raidPtr->numCol;j++) { 2746 if (raidPtr->Disks[j].spareCol == sparecol) { 2747 scol = j; 2748 break; 2749 } 2750 } 2751 2752 /* XXX shouldn't *really* need this... */ 2753 clabel = raidget_component_label(raidPtr, sparecol); 2754 /* make sure status is noted */ 2755 2756 raid_init_component_label(raidPtr, clabel); 2757 2758 clabel->column = scol; 2759 clabel->status = rf_ds_optimal; 2760 if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0) 2761 clabel->last_unit = raidPtr->raidid; 2762 2763 raidflush_component_label(raidPtr, sparecol); 2764 if (final == RF_FINAL_COMPONENT_UPDATE) { 2765 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2766 raidmarkclean(raidPtr, sparecol); 2767 } 2768 } 2769 } 2770 } 2771 } 2772 2773 void 2774 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2775 { 2776 2777 if (vp != NULL) { 2778 if (auto_configured == 1) { 2779 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2780 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2781 vput(vp); 2782 2783 } else { 2784 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2785 } 2786 } 2787 } 2788 2789 2790 void 2791 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2792 { 2793 int r,c; 2794 struct vnode *vp; 2795 int acd; 2796 2797 2798 /* We take this opportunity to close the vnodes like we should.. */ 2799 2800 for (c = 0; c < raidPtr->numCol; c++) { 2801 vp = raidPtr->raid_cinfo[c].ci_vp; 2802 acd = raidPtr->Disks[c].auto_configured; 2803 rf_close_component(raidPtr, vp, acd); 2804 raidPtr->raid_cinfo[c].ci_vp = NULL; 2805 raidPtr->Disks[c].auto_configured = 0; 2806 } 2807 2808 for (r = 0; r < raidPtr->numSpare; r++) { 2809 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2810 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2811 rf_close_component(raidPtr, vp, acd); 2812 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2813 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2814 } 2815 } 2816 2817 2818 static void 2819 rf_ReconThread(struct rf_recon_req_internal *req) 2820 { 2821 int s; 2822 RF_Raid_t *raidPtr; 2823 2824 s = splbio(); 2825 raidPtr = (RF_Raid_t *) req->raidPtr; 2826 raidPtr->recon_in_progress = 1; 2827 2828 if (req->flags & RF_FDFLAGS_RECON_FORCE) { 2829 raidPtr->forceRecon = 1; 2830 } 2831 2832 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2833 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2834 2835 if (req->flags & RF_FDFLAGS_RECON_FORCE) { 2836 raidPtr->forceRecon = 0; 2837 } 2838 2839 RF_Free(req, sizeof(*req)); 2840 2841 raidPtr->recon_in_progress = 0; 2842 splx(s); 2843 2844 /* That's all... */ 2845 kthread_exit(0); /* does not return */ 2846 } 2847 2848 static void 2849 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2850 { 2851 int retcode; 2852 int s; 2853 2854 raidPtr->parity_rewrite_stripes_done = 0; 2855 raidPtr->parity_rewrite_in_progress = 1; 2856 s = splbio(); 2857 retcode = rf_RewriteParity(raidPtr); 2858 splx(s); 2859 if (retcode) { 2860 printf("raid%d: Error re-writing parity (%d)!\n", 2861 raidPtr->raidid, retcode); 2862 } else { 2863 /* set the clean bit! If we shutdown correctly, 2864 the clean bit on each component label will get 2865 set */ 2866 raidPtr->parity_good = RF_RAID_CLEAN; 2867 } 2868 raidPtr->parity_rewrite_in_progress = 0; 2869 2870 /* Anyone waiting for us to stop? If so, inform them... */ 2871 if (raidPtr->waitShutdown) { 2872 rf_lock_mutex2(raidPtr->rad_lock); 2873 cv_broadcast(&raidPtr->parity_rewrite_cv); 2874 rf_unlock_mutex2(raidPtr->rad_lock); 2875 } 2876 2877 /* That's all... */ 2878 kthread_exit(0); /* does not return */ 2879 } 2880 2881 static void 2882 rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req) 2883 { 2884 int s; 2885 RF_Raid_t *raidPtr; 2886 2887 s = splbio(); 2888 raidPtr = req->raidPtr; 2889 raidPtr->recon_in_progress = 1; 2890 2891 if (req->flags & RF_FDFLAGS_RECON_FORCE) { 2892 raidPtr->forceRecon = 1; 2893 } 2894 2895 rf_ReconstructInPlace(raidPtr, req->col); 2896 2897 if (req->flags & RF_FDFLAGS_RECON_FORCE) { 2898 raidPtr->forceRecon = 0; 2899 } 2900 2901 RF_Free(req, sizeof(*req)); 2902 raidPtr->recon_in_progress = 0; 2903 splx(s); 2904 2905 /* That's all... */ 2906 kthread_exit(0); /* does not return */ 2907 } 2908 2909 static RF_AutoConfig_t * 2910 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 2911 const char *cname, RF_SectorCount_t size, uint64_t numsecs, 2912 unsigned secsize) 2913 { 2914 int good_one = 0; 2915 RF_ComponentLabel_t *clabel; 2916 RF_AutoConfig_t *ac; 2917 2918 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK); 2919 2920 if (!raidread_component_label(secsize, dev, vp, clabel)) { 2921 /* Got the label. Does it look reasonable? */ 2922 if (rf_reasonable_label(clabel, numsecs) && 2923 (rf_component_label_partitionsize(clabel) <= size)) { 2924 #ifdef DEBUG 2925 printf("Component on: %s: %llu\n", 2926 cname, (unsigned long long)size); 2927 rf_print_component_label(clabel); 2928 #endif 2929 /* if it's reasonable, add it, else ignore it. */ 2930 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 2931 M_WAITOK); 2932 strlcpy(ac->devname, cname, sizeof(ac->devname)); 2933 ac->dev = dev; 2934 ac->vp = vp; 2935 ac->clabel = clabel; 2936 ac->next = ac_list; 2937 ac_list = ac; 2938 good_one = 1; 2939 } 2940 } 2941 if (!good_one) { 2942 /* cleanup */ 2943 free(clabel, M_RAIDFRAME); 2944 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2945 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2946 vput(vp); 2947 } 2948 return ac_list; 2949 } 2950 2951 static RF_AutoConfig_t * 2952 rf_find_raid_components(void) 2953 { 2954 struct vnode *vp; 2955 struct disklabel label; 2956 device_t dv; 2957 deviter_t di; 2958 dev_t dev; 2959 int bmajor, bminor, wedge, rf_part_found; 2960 int error; 2961 int i; 2962 RF_AutoConfig_t *ac_list; 2963 uint64_t numsecs; 2964 unsigned secsize; 2965 int dowedges; 2966 2967 /* initialize the AutoConfig list */ 2968 ac_list = NULL; 2969 2970 /* 2971 * we begin by trolling through *all* the devices on the system *twice* 2972 * first we scan for wedges, second for other devices. This avoids 2973 * using a raw partition instead of a wedge that covers the whole disk 2974 */ 2975 2976 for (dowedges=1; dowedges>=0; --dowedges) { 2977 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; 2978 dv = deviter_next(&di)) { 2979 2980 /* we are only interested in disks */ 2981 if (device_class(dv) != DV_DISK) 2982 continue; 2983 2984 /* we don't care about floppies */ 2985 if (device_is_a(dv, "fd")) { 2986 continue; 2987 } 2988 2989 /* we don't care about CDs. */ 2990 if (device_is_a(dv, "cd")) { 2991 continue; 2992 } 2993 2994 /* we don't care about md. */ 2995 if (device_is_a(dv, "md")) { 2996 continue; 2997 } 2998 2999 /* hdfd is the Atari/Hades floppy driver */ 3000 if (device_is_a(dv, "hdfd")) { 3001 continue; 3002 } 3003 3004 /* fdisa is the Atari/Milan floppy driver */ 3005 if (device_is_a(dv, "fdisa")) { 3006 continue; 3007 } 3008 3009 /* we don't care about spiflash */ 3010 if (device_is_a(dv, "spiflash")) { 3011 continue; 3012 } 3013 3014 /* are we in the wedges pass ? */ 3015 wedge = device_is_a(dv, "dk"); 3016 if (wedge != dowedges) { 3017 continue; 3018 } 3019 3020 /* need to find the device_name_to_block_device_major stuff */ 3021 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 3022 3023 rf_part_found = 0; /*No raid partition as yet*/ 3024 3025 /* get a vnode for the raw partition of this disk */ 3026 bminor = minor(device_unit(dv)); 3027 dev = wedge ? makedev(bmajor, bminor) : 3028 MAKEDISKDEV(bmajor, bminor, RAW_PART); 3029 if (bdevvp(dev, &vp)) 3030 panic("RAID can't alloc vnode"); 3031 3032 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3033 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED); 3034 3035 if (error) { 3036 /* "Who cares." Continue looking 3037 for something that exists*/ 3038 vput(vp); 3039 continue; 3040 } 3041 3042 error = getdisksize(vp, &numsecs, &secsize); 3043 if (error) { 3044 /* 3045 * Pseudo devices like vnd and cgd can be 3046 * opened but may still need some configuration. 3047 * Ignore these quietly. 3048 */ 3049 if (error != ENXIO) 3050 printf("RAIDframe: can't get disk size" 3051 " for dev %s (%d)\n", 3052 device_xname(dv), error); 3053 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3054 vput(vp); 3055 continue; 3056 } 3057 if (wedge) { 3058 struct dkwedge_info dkw; 3059 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 3060 NOCRED); 3061 if (error) { 3062 printf("RAIDframe: can't get wedge info for " 3063 "dev %s (%d)\n", device_xname(dv), error); 3064 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3065 vput(vp); 3066 continue; 3067 } 3068 3069 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 3070 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3071 vput(vp); 3072 continue; 3073 } 3074 3075 VOP_UNLOCK(vp); 3076 ac_list = rf_get_component(ac_list, dev, vp, 3077 device_xname(dv), dkw.dkw_size, numsecs, secsize); 3078 rf_part_found = 1; /*There is a raid component on this disk*/ 3079 continue; 3080 } 3081 3082 /* Ok, the disk exists. Go get the disklabel. */ 3083 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 3084 if (error) { 3085 /* 3086 * XXX can't happen - open() would 3087 * have errored out (or faked up one) 3088 */ 3089 if (error != ENOTTY) 3090 printf("RAIDframe: can't get label for dev " 3091 "%s (%d)\n", device_xname(dv), error); 3092 } 3093 3094 /* don't need this any more. We'll allocate it again 3095 a little later if we really do... */ 3096 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3097 vput(vp); 3098 3099 if (error) 3100 continue; 3101 3102 rf_part_found = 0; /*No raid partitions yet*/ 3103 for (i = 0; i < label.d_npartitions; i++) { 3104 char cname[sizeof(ac_list->devname)]; 3105 3106 /* We only support partitions marked as RAID */ 3107 if (label.d_partitions[i].p_fstype != FS_RAID) 3108 continue; 3109 3110 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 3111 if (bdevvp(dev, &vp)) 3112 panic("RAID can't alloc vnode"); 3113 3114 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3115 error = VOP_OPEN(vp, FREAD, NOCRED); 3116 if (error) { 3117 /* Not quite a 'whatever'. In 3118 * this situation we know 3119 * there is a FS_RAID 3120 * partition, but we can't 3121 * open it. The most likely 3122 * reason is that the 3123 * partition is already in 3124 * use by another RAID set. 3125 * So note that we've already 3126 * found a partition on this 3127 * disk so we don't attempt 3128 * to use the raw disk later. */ 3129 rf_part_found = 1; 3130 vput(vp); 3131 continue; 3132 } 3133 VOP_UNLOCK(vp); 3134 snprintf(cname, sizeof(cname), "%s%c", 3135 device_xname(dv), 'a' + i); 3136 ac_list = rf_get_component(ac_list, dev, vp, cname, 3137 label.d_partitions[i].p_size, numsecs, secsize); 3138 rf_part_found = 1; /*There is at least one raid partition on this disk*/ 3139 } 3140 3141 /* 3142 *If there is no raid component on this disk, either in a 3143 *disklabel or inside a wedge, check the raw partition as well, 3144 *as it is possible to configure raid components on raw disk 3145 *devices. 3146 */ 3147 3148 if (!rf_part_found) { 3149 char cname[sizeof(ac_list->devname)]; 3150 3151 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART); 3152 if (bdevvp(dev, &vp)) 3153 panic("RAID can't alloc vnode"); 3154 3155 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3156 3157 error = VOP_OPEN(vp, FREAD, NOCRED); 3158 if (error) { 3159 /* Whatever... */ 3160 vput(vp); 3161 continue; 3162 } 3163 VOP_UNLOCK(vp); 3164 snprintf(cname, sizeof(cname), "%s%c", 3165 device_xname(dv), 'a' + RAW_PART); 3166 ac_list = rf_get_component(ac_list, dev, vp, cname, 3167 label.d_partitions[RAW_PART].p_size, numsecs, secsize); 3168 } 3169 } 3170 deviter_release(&di); 3171 } 3172 return ac_list; 3173 } 3174 3175 int 3176 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3177 { 3178 3179 if ((clabel->version==RF_COMPONENT_LABEL_VERSION_1 || 3180 clabel->version==RF_COMPONENT_LABEL_VERSION || 3181 clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) && 3182 (clabel->clean == RF_RAID_CLEAN || 3183 clabel->clean == RF_RAID_DIRTY) && 3184 clabel->row >=0 && 3185 clabel->column >= 0 && 3186 clabel->num_rows > 0 && 3187 clabel->num_columns > 0 && 3188 clabel->row < clabel->num_rows && 3189 clabel->column < clabel->num_columns && 3190 clabel->blockSize > 0 && 3191 /* 3192 * numBlocksHi may contain garbage, but it is ok since 3193 * the type is unsigned. If it is really garbage, 3194 * rf_fix_old_label_size() will fix it. 3195 */ 3196 rf_component_label_numblocks(clabel) > 0) { 3197 /* 3198 * label looks reasonable enough... 3199 * let's make sure it has no old garbage. 3200 */ 3201 if (numsecs) 3202 rf_fix_old_label_size(clabel, numsecs); 3203 return(1); 3204 } 3205 return(0); 3206 } 3207 3208 3209 /* 3210 * For reasons yet unknown, some old component labels have garbage in 3211 * the newer numBlocksHi region, and this causes lossage. Since those 3212 * disks will also have numsecs set to less than 32 bits of sectors, 3213 * we can determine when this corruption has occurred, and fix it. 3214 * 3215 * The exact same problem, with the same unknown reason, happens to 3216 * the partitionSizeHi member as well. 3217 */ 3218 static void 3219 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3220 { 3221 3222 if (numsecs < ((uint64_t)1 << 32)) { 3223 if (clabel->numBlocksHi) { 3224 printf("WARNING: total sectors < 32 bits, yet " 3225 "numBlocksHi set\n" 3226 "WARNING: resetting numBlocksHi to zero.\n"); 3227 clabel->numBlocksHi = 0; 3228 } 3229 3230 if (clabel->partitionSizeHi) { 3231 printf("WARNING: total sectors < 32 bits, yet " 3232 "partitionSizeHi set\n" 3233 "WARNING: resetting partitionSizeHi to zero.\n"); 3234 clabel->partitionSizeHi = 0; 3235 } 3236 } 3237 } 3238 3239 3240 #ifdef DEBUG 3241 void 3242 rf_print_component_label(RF_ComponentLabel_t *clabel) 3243 { 3244 uint64_t numBlocks; 3245 static const char *rp[] = { 3246 "No", "Force", "Soft", "*invalid*" 3247 }; 3248 3249 3250 numBlocks = rf_component_label_numblocks(clabel); 3251 3252 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3253 clabel->row, clabel->column, 3254 clabel->num_rows, clabel->num_columns); 3255 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3256 clabel->version, clabel->serial_number, 3257 clabel->mod_counter); 3258 printf(" Clean: %s Status: %d\n", 3259 clabel->clean ? "Yes" : "No", clabel->status); 3260 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3261 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3262 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n", 3263 (char) clabel->parityConfig, clabel->blockSize, numBlocks); 3264 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No"); 3265 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]); 3266 printf(" Last configured as: raid%d\n", clabel->last_unit); 3267 #if 0 3268 printf(" Config order: %d\n", clabel->config_order); 3269 #endif 3270 3271 } 3272 #endif 3273 3274 static RF_ConfigSet_t * 3275 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3276 { 3277 RF_AutoConfig_t *ac; 3278 RF_ConfigSet_t *config_sets; 3279 RF_ConfigSet_t *cset; 3280 RF_AutoConfig_t *ac_next; 3281 3282 3283 config_sets = NULL; 3284 3285 /* Go through the AutoConfig list, and figure out which components 3286 belong to what sets. */ 3287 ac = ac_list; 3288 while(ac!=NULL) { 3289 /* we're going to putz with ac->next, so save it here 3290 for use at the end of the loop */ 3291 ac_next = ac->next; 3292 3293 if (config_sets == NULL) { 3294 /* will need at least this one... */ 3295 config_sets = malloc(sizeof(RF_ConfigSet_t), 3296 M_RAIDFRAME, M_WAITOK); 3297 /* this one is easy :) */ 3298 config_sets->ac = ac; 3299 config_sets->next = NULL; 3300 config_sets->rootable = 0; 3301 ac->next = NULL; 3302 } else { 3303 /* which set does this component fit into? */ 3304 cset = config_sets; 3305 while(cset!=NULL) { 3306 if (rf_does_it_fit(cset, ac)) { 3307 /* looks like it matches... */ 3308 ac->next = cset->ac; 3309 cset->ac = ac; 3310 break; 3311 } 3312 cset = cset->next; 3313 } 3314 if (cset==NULL) { 3315 /* didn't find a match above... new set..*/ 3316 cset = malloc(sizeof(RF_ConfigSet_t), 3317 M_RAIDFRAME, M_WAITOK); 3318 cset->ac = ac; 3319 ac->next = NULL; 3320 cset->next = config_sets; 3321 cset->rootable = 0; 3322 config_sets = cset; 3323 } 3324 } 3325 ac = ac_next; 3326 } 3327 3328 3329 return(config_sets); 3330 } 3331 3332 static int 3333 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3334 { 3335 RF_ComponentLabel_t *clabel1, *clabel2; 3336 3337 /* If this one matches the *first* one in the set, that's good 3338 enough, since the other members of the set would have been 3339 through here too... */ 3340 /* note that we are not checking partitionSize here.. 3341 3342 Note that we are also not checking the mod_counters here. 3343 If everything else matches except the mod_counter, that's 3344 good enough for this test. We will deal with the mod_counters 3345 a little later in the autoconfiguration process. 3346 3347 (clabel1->mod_counter == clabel2->mod_counter) && 3348 3349 The reason we don't check for this is that failed disks 3350 will have lower modification counts. If those disks are 3351 not added to the set they used to belong to, then they will 3352 form their own set, which may result in 2 different sets, 3353 for example, competing to be configured at raid0, and 3354 perhaps competing to be the root filesystem set. If the 3355 wrong ones get configured, or both attempt to become /, 3356 weird behaviour and or serious lossage will occur. Thus we 3357 need to bring them into the fold here, and kick them out at 3358 a later point. 3359 3360 */ 3361 3362 clabel1 = cset->ac->clabel; 3363 clabel2 = ac->clabel; 3364 if ((clabel1->version == clabel2->version) && 3365 (clabel1->serial_number == clabel2->serial_number) && 3366 (clabel1->num_rows == clabel2->num_rows) && 3367 (clabel1->num_columns == clabel2->num_columns) && 3368 (clabel1->sectPerSU == clabel2->sectPerSU) && 3369 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3370 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3371 (clabel1->parityConfig == clabel2->parityConfig) && 3372 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3373 (clabel1->blockSize == clabel2->blockSize) && 3374 rf_component_label_numblocks(clabel1) == 3375 rf_component_label_numblocks(clabel2) && 3376 (clabel1->autoconfigure == clabel2->autoconfigure) && 3377 (clabel1->root_partition == clabel2->root_partition) && 3378 (clabel1->last_unit == clabel2->last_unit) && 3379 (clabel1->config_order == clabel2->config_order)) { 3380 /* if it get's here, it almost *has* to be a match */ 3381 } else { 3382 /* it's not consistent with somebody in the set.. 3383 punt */ 3384 return(0); 3385 } 3386 /* all was fine.. it must fit... */ 3387 return(1); 3388 } 3389 3390 static int 3391 rf_have_enough_components(RF_ConfigSet_t *cset) 3392 { 3393 RF_AutoConfig_t *ac; 3394 RF_AutoConfig_t *auto_config; 3395 RF_ComponentLabel_t *clabel; 3396 int c; 3397 int num_cols; 3398 int num_missing; 3399 int mod_counter; 3400 int mod_counter_found; 3401 int even_pair_failed; 3402 char parity_type; 3403 3404 3405 /* check to see that we have enough 'live' components 3406 of this set. If so, we can configure it if necessary */ 3407 3408 num_cols = cset->ac->clabel->num_columns; 3409 parity_type = cset->ac->clabel->parityConfig; 3410 3411 /* XXX Check for duplicate components!?!?!? */ 3412 3413 /* Determine what the mod_counter is supposed to be for this set. */ 3414 3415 mod_counter_found = 0; 3416 mod_counter = 0; 3417 ac = cset->ac; 3418 while(ac!=NULL) { 3419 if (mod_counter_found==0) { 3420 mod_counter = ac->clabel->mod_counter; 3421 mod_counter_found = 1; 3422 } else { 3423 if (ac->clabel->mod_counter > mod_counter) { 3424 mod_counter = ac->clabel->mod_counter; 3425 } 3426 } 3427 ac = ac->next; 3428 } 3429 3430 num_missing = 0; 3431 auto_config = cset->ac; 3432 3433 even_pair_failed = 0; 3434 for(c=0; c<num_cols; c++) { 3435 ac = auto_config; 3436 while(ac!=NULL) { 3437 if ((ac->clabel->column == c) && 3438 (ac->clabel->mod_counter == mod_counter)) { 3439 /* it's this one... */ 3440 #ifdef DEBUG 3441 printf("Found: %s at %d\n", 3442 ac->devname,c); 3443 #endif 3444 break; 3445 } 3446 ac=ac->next; 3447 } 3448 if (ac==NULL) { 3449 /* Didn't find one here! */ 3450 /* special case for RAID 1, especially 3451 where there are more than 2 3452 components (where RAIDframe treats 3453 things a little differently :( ) */ 3454 if (parity_type == '1') { 3455 if (c%2 == 0) { /* even component */ 3456 even_pair_failed = 1; 3457 } else { /* odd component. If 3458 we're failed, and 3459 so is the even 3460 component, it's 3461 "Good Night, Charlie" */ 3462 if (even_pair_failed == 1) { 3463 return(0); 3464 } 3465 } 3466 } else { 3467 /* normal accounting */ 3468 num_missing++; 3469 } 3470 } 3471 if ((parity_type == '1') && (c%2 == 1)) { 3472 /* Just did an even component, and we didn't 3473 bail.. reset the even_pair_failed flag, 3474 and go on to the next component.... */ 3475 even_pair_failed = 0; 3476 } 3477 } 3478 3479 clabel = cset->ac->clabel; 3480 3481 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3482 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3483 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3484 /* XXX this needs to be made *much* more general */ 3485 /* Too many failures */ 3486 return(0); 3487 } 3488 /* otherwise, all is well, and we've got enough to take a kick 3489 at autoconfiguring this set */ 3490 return(1); 3491 } 3492 3493 static void 3494 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3495 RF_Raid_t *raidPtr) 3496 { 3497 RF_ComponentLabel_t *clabel; 3498 int i; 3499 3500 clabel = ac->clabel; 3501 3502 /* 1. Fill in the common stuff */ 3503 config->numCol = clabel->num_columns; 3504 config->numSpare = 0; /* XXX should this be set here? */ 3505 config->sectPerSU = clabel->sectPerSU; 3506 config->SUsPerPU = clabel->SUsPerPU; 3507 config->SUsPerRU = clabel->SUsPerRU; 3508 config->parityConfig = clabel->parityConfig; 3509 /* XXX... */ 3510 strcpy(config->diskQueueType,"fifo"); 3511 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3512 config->layoutSpecificSize = 0; /* XXX ?? */ 3513 3514 while(ac!=NULL) { 3515 /* row/col values will be in range due to the checks 3516 in reasonable_label() */ 3517 strcpy(config->devnames[0][ac->clabel->column], 3518 ac->devname); 3519 ac = ac->next; 3520 } 3521 3522 for(i=0;i<RF_MAXDBGV;i++) { 3523 config->debugVars[i][0] = 0; 3524 } 3525 } 3526 3527 static int 3528 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3529 { 3530 RF_ComponentLabel_t *clabel; 3531 int column; 3532 int sparecol; 3533 3534 raidPtr->autoconfigure = new_value; 3535 3536 for(column=0; column<raidPtr->numCol; column++) { 3537 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3538 clabel = raidget_component_label(raidPtr, column); 3539 clabel->autoconfigure = new_value; 3540 raidflush_component_label(raidPtr, column); 3541 } 3542 } 3543 for(column = 0; column < raidPtr->numSpare ; column++) { 3544 sparecol = raidPtr->numCol + column; 3545 3546 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3547 clabel = raidget_component_label(raidPtr, sparecol); 3548 clabel->autoconfigure = new_value; 3549 raidflush_component_label(raidPtr, sparecol); 3550 } 3551 } 3552 return(new_value); 3553 } 3554 3555 static int 3556 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3557 { 3558 RF_ComponentLabel_t *clabel; 3559 int column; 3560 int sparecol; 3561 3562 raidPtr->root_partition = new_value; 3563 for(column=0; column<raidPtr->numCol; column++) { 3564 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3565 clabel = raidget_component_label(raidPtr, column); 3566 clabel->root_partition = new_value; 3567 raidflush_component_label(raidPtr, column); 3568 } 3569 } 3570 for (column = 0; column < raidPtr->numSpare ; column++) { 3571 sparecol = raidPtr->numCol + column; 3572 3573 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3574 clabel = raidget_component_label(raidPtr, sparecol); 3575 clabel->root_partition = new_value; 3576 raidflush_component_label(raidPtr, sparecol); 3577 } 3578 } 3579 return(new_value); 3580 } 3581 3582 static void 3583 rf_release_all_vps(RF_ConfigSet_t *cset) 3584 { 3585 RF_AutoConfig_t *ac; 3586 3587 ac = cset->ac; 3588 while(ac!=NULL) { 3589 /* Close the vp, and give it back */ 3590 if (ac->vp) { 3591 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3592 VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED); 3593 vput(ac->vp); 3594 ac->vp = NULL; 3595 } 3596 ac = ac->next; 3597 } 3598 } 3599 3600 3601 static void 3602 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3603 { 3604 RF_AutoConfig_t *ac; 3605 RF_AutoConfig_t *next_ac; 3606 3607 ac = cset->ac; 3608 while(ac!=NULL) { 3609 next_ac = ac->next; 3610 /* nuke the label */ 3611 free(ac->clabel, M_RAIDFRAME); 3612 /* cleanup the config structure */ 3613 free(ac, M_RAIDFRAME); 3614 /* "next.." */ 3615 ac = next_ac; 3616 } 3617 /* and, finally, nuke the config set */ 3618 free(cset, M_RAIDFRAME); 3619 } 3620 3621 3622 void 3623 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3624 { 3625 /* avoid over-writing byteswapped version. */ 3626 if (clabel->version != bswap32(RF_COMPONENT_LABEL_VERSION)) 3627 clabel->version = RF_COMPONENT_LABEL_VERSION; 3628 clabel->serial_number = raidPtr->serial_number; 3629 clabel->mod_counter = raidPtr->mod_counter; 3630 3631 clabel->num_rows = 1; 3632 clabel->num_columns = raidPtr->numCol; 3633 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3634 clabel->status = rf_ds_optimal; /* "It's good!" */ 3635 3636 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3637 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3638 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3639 3640 clabel->blockSize = raidPtr->bytesPerSector; 3641 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk); 3642 3643 /* XXX not portable */ 3644 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3645 clabel->maxOutstanding = raidPtr->maxOutstanding; 3646 clabel->autoconfigure = raidPtr->autoconfigure; 3647 clabel->root_partition = raidPtr->root_partition; 3648 clabel->last_unit = raidPtr->raidid; 3649 clabel->config_order = raidPtr->config_order; 3650 3651 #ifndef RF_NO_PARITY_MAP 3652 rf_paritymap_init_label(raidPtr->parity_map, clabel); 3653 #endif 3654 } 3655 3656 static struct raid_softc * 3657 rf_auto_config_set(RF_ConfigSet_t *cset) 3658 { 3659 RF_Raid_t *raidPtr; 3660 RF_Config_t *config; 3661 int raidID; 3662 struct raid_softc *sc; 3663 3664 #ifdef DEBUG 3665 printf("RAID autoconfigure\n"); 3666 #endif 3667 3668 /* 1. Create a config structure */ 3669 config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO); 3670 3671 /* 3672 2. Figure out what RAID ID this one is supposed to live at 3673 See if we can get the same RAID dev that it was configured 3674 on last time.. 3675 */ 3676 3677 raidID = cset->ac->clabel->last_unit; 3678 for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0; 3679 sc = raidget(++raidID, false)) 3680 continue; 3681 #ifdef DEBUG 3682 printf("Configuring raid%d:\n",raidID); 3683 #endif 3684 3685 if (sc == NULL) 3686 sc = raidget(raidID, true); 3687 raidPtr = &sc->sc_r; 3688 3689 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3690 raidPtr->softc = sc; 3691 raidPtr->raidid = raidID; 3692 raidPtr->openings = RAIDOUTSTANDING; 3693 3694 /* 3. Build the configuration structure */ 3695 rf_create_configuration(cset->ac, config, raidPtr); 3696 3697 /* 4. Do the configuration */ 3698 if (rf_Configure(raidPtr, config, cset->ac) == 0) { 3699 raidinit(sc); 3700 3701 rf_markalldirty(raidPtr); 3702 raidPtr->autoconfigure = 1; /* XXX do this here? */ 3703 switch (cset->ac->clabel->root_partition) { 3704 case 1: /* Force Root */ 3705 case 2: /* Soft Root: root when boot partition part of raid */ 3706 /* 3707 * everything configured just fine. Make a note 3708 * that this set is eligible to be root, 3709 * or forced to be root 3710 */ 3711 cset->rootable = cset->ac->clabel->root_partition; 3712 /* XXX do this here? */ 3713 raidPtr->root_partition = cset->rootable; 3714 break; 3715 default: 3716 break; 3717 } 3718 } else { 3719 raidput(sc); 3720 sc = NULL; 3721 } 3722 3723 /* 5. Cleanup */ 3724 free(config, M_RAIDFRAME); 3725 return sc; 3726 } 3727 3728 void 3729 rf_pool_init(RF_Raid_t *raidPtr, char *w_chan, struct pool *p, size_t size, const char *pool_name, 3730 size_t xmin, size_t xmax) 3731 { 3732 3733 /* Format: raid%d_foo */ 3734 snprintf(w_chan, RF_MAX_POOLNAMELEN, "raid%d_%s", raidPtr->raidid, pool_name); 3735 3736 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3737 pool_sethiwat(p, xmax); 3738 pool_prime(p, xmin); 3739 } 3740 3741 3742 /* 3743 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue 3744 * to see if there is IO pending and if that IO could possibly be done 3745 * for a given RAID set. Returns 0 if IO is waiting and can be done, 1 3746 * otherwise. 3747 * 3748 */ 3749 int 3750 rf_buf_queue_check(RF_Raid_t *raidPtr) 3751 { 3752 struct raid_softc *rs; 3753 struct dk_softc *dksc; 3754 3755 rs = raidPtr->softc; 3756 dksc = &rs->sc_dksc; 3757 3758 if ((rs->sc_flags & RAIDF_INITED) == 0) 3759 return 1; 3760 3761 if (dk_strategy_pending(dksc) && raidPtr->openings > 0) { 3762 /* there is work to do */ 3763 return 0; 3764 } 3765 /* default is nothing to do */ 3766 return 1; 3767 } 3768 3769 int 3770 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr) 3771 { 3772 uint64_t numsecs; 3773 unsigned secsize; 3774 int error; 3775 3776 error = getdisksize(vp, &numsecs, &secsize); 3777 if (error == 0) { 3778 diskPtr->blockSize = secsize; 3779 diskPtr->numBlocks = numsecs - rf_protectedSectors; 3780 diskPtr->partitionSize = numsecs; 3781 return 0; 3782 } 3783 return error; 3784 } 3785 3786 static int 3787 raid_match(device_t self, cfdata_t cfdata, void *aux) 3788 { 3789 return 1; 3790 } 3791 3792 static void 3793 raid_attach(device_t parent, device_t self, void *aux) 3794 { 3795 } 3796 3797 3798 static int 3799 raid_detach(device_t self, int flags) 3800 { 3801 int error; 3802 struct raid_softc *rs = raidsoftc(self); 3803 3804 if (rs == NULL) 3805 return ENXIO; 3806 3807 if ((error = raidlock(rs)) != 0) 3808 return error; 3809 3810 error = raid_detach_unlocked(rs); 3811 3812 raidunlock(rs); 3813 3814 /* XXX raid can be referenced here */ 3815 3816 if (error) 3817 return error; 3818 3819 /* Free the softc */ 3820 raidput(rs); 3821 3822 return 0; 3823 } 3824 3825 static void 3826 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr) 3827 { 3828 struct dk_softc *dksc = &rs->sc_dksc; 3829 struct disk_geom *dg = &dksc->sc_dkdev.dk_geom; 3830 3831 memset(dg, 0, sizeof(*dg)); 3832 3833 dg->dg_secperunit = raidPtr->totalSectors; 3834 dg->dg_secsize = raidPtr->bytesPerSector; 3835 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe; 3836 dg->dg_ntracks = 4 * raidPtr->numCol; 3837 3838 disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL); 3839 } 3840 3841 /* 3842 * Get cache info for all the components (including spares). 3843 * Returns intersection of all the cache flags of all disks, or first 3844 * error if any encountered. 3845 * XXXfua feature flags can change as spares are added - lock down somehow 3846 */ 3847 static int 3848 rf_get_component_caches(RF_Raid_t *raidPtr, int *data) 3849 { 3850 int c; 3851 int error; 3852 int dkwhole = 0, dkpart; 3853 3854 for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) { 3855 /* 3856 * Check any non-dead disk, even when currently being 3857 * reconstructed. 3858 */ 3859 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 3860 error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, 3861 DIOCGCACHE, &dkpart, FREAD, NOCRED); 3862 if (error) { 3863 if (error != ENODEV) { 3864 printf("raid%d: get cache for component %s failed\n", 3865 raidPtr->raidid, 3866 raidPtr->Disks[c].devname); 3867 } 3868 3869 return error; 3870 } 3871 3872 if (c == 0) 3873 dkwhole = dkpart; 3874 else 3875 dkwhole = DKCACHE_COMBINE(dkwhole, dkpart); 3876 } 3877 } 3878 3879 *data = dkwhole; 3880 3881 return 0; 3882 } 3883 3884 /* 3885 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components. 3886 * We end up returning whatever error was returned by the first cache flush 3887 * that fails. 3888 */ 3889 3890 static int 3891 rf_sync_component_cache(RF_Raid_t *raidPtr, int c, int force) 3892 { 3893 int e = 0; 3894 for (int i = 0; i < 5; i++) { 3895 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC, 3896 &force, FWRITE, NOCRED); 3897 if (!e || e == ENODEV) 3898 return e; 3899 printf("raid%d: cache flush[%d] to component %s failed (%d)\n", 3900 raidPtr->raidid, i, raidPtr->Disks[c].devname, e); 3901 } 3902 return e; 3903 } 3904 3905 int 3906 rf_sync_component_caches(RF_Raid_t *raidPtr, int force) 3907 { 3908 int c, error; 3909 3910 error = 0; 3911 for (c = 0; c < raidPtr->numCol; c++) { 3912 if (raidPtr->Disks[c].status == rf_ds_optimal) { 3913 int e = rf_sync_component_cache(raidPtr, c, force); 3914 if (e && !error) 3915 error = e; 3916 } 3917 } 3918 3919 for (c = 0; c < raidPtr->numSpare ; c++) { 3920 int sparecol = raidPtr->numCol + c; 3921 3922 /* Need to ensure that the reconstruct actually completed! */ 3923 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3924 int e = rf_sync_component_cache(raidPtr, sparecol, 3925 force); 3926 if (e && !error) 3927 error = e; 3928 } 3929 } 3930 return error; 3931 } 3932 3933 /* Fill in info with the current status */ 3934 void 3935 rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info) 3936 { 3937 3938 memset(info, 0, sizeof(*info)); 3939 3940 if (raidPtr->status != rf_rs_reconstructing) { 3941 info->total = 100; 3942 info->completed = 100; 3943 } else { 3944 info->total = raidPtr->reconControl->numRUsTotal; 3945 info->completed = raidPtr->reconControl->numRUsComplete; 3946 } 3947 info->remaining = info->total - info->completed; 3948 } 3949 3950 /* Fill in info with the current status */ 3951 void 3952 rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info) 3953 { 3954 3955 memset(info, 0, sizeof(*info)); 3956 3957 if (raidPtr->parity_rewrite_in_progress == 1) { 3958 info->total = raidPtr->Layout.numStripe; 3959 info->completed = raidPtr->parity_rewrite_stripes_done; 3960 } else { 3961 info->completed = 100; 3962 info->total = 100; 3963 } 3964 info->remaining = info->total - info->completed; 3965 } 3966 3967 /* Fill in info with the current status */ 3968 void 3969 rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info) 3970 { 3971 3972 memset(info, 0, sizeof(*info)); 3973 info->remaining = 0; 3974 info->completed = 100; 3975 info->total = 100; 3976 } 3977 3978 /* Fill in config with the current info */ 3979 int 3980 rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config) 3981 { 3982 int d, i, j; 3983 3984 if (!raidPtr->valid) 3985 return ENODEV; 3986 config->cols = raidPtr->numCol; 3987 config->ndevs = raidPtr->numCol; 3988 if (config->ndevs >= RF_MAX_DISKS) 3989 return ENOMEM; 3990 config->nspares = raidPtr->numSpare; 3991 if (config->nspares >= RF_MAX_DISKS) 3992 return ENOMEM; 3993 config->maxqdepth = raidPtr->maxQueueDepth; 3994 d = 0; 3995 for (j = 0; j < config->cols; j++) { 3996 config->devs[d] = raidPtr->Disks[j]; 3997 d++; 3998 } 3999 for (i = 0; i < config->nspares; i++) { 4000 config->spares[i] = raidPtr->Disks[raidPtr->numCol + i]; 4001 if (config->spares[i].status == rf_ds_rebuilding_spare) { 4002 /* raidctl(8) expects to see this as a used spare */ 4003 config->spares[i].status = rf_ds_used_spare; 4004 } 4005 } 4006 return 0; 4007 } 4008 4009 int 4010 rf_get_component_label(RF_Raid_t *raidPtr, void *data) 4011 { 4012 RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data; 4013 RF_ComponentLabel_t *raid_clabel; 4014 int column = clabel->column; 4015 4016 if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare)) 4017 return EINVAL; 4018 raid_clabel = raidget_component_label(raidPtr, column); 4019 memcpy(clabel, raid_clabel, sizeof *clabel); 4020 /* Fix-up for userland. */ 4021 if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) 4022 clabel->version = RF_COMPONENT_LABEL_VERSION; 4023 4024 return 0; 4025 } 4026 4027 /* 4028 * Module interface 4029 */ 4030 4031 MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs"); 4032 4033 #ifdef _MODULE 4034 CFDRIVER_DECL(raid, DV_DISK, NULL); 4035 #endif 4036 4037 static int raid_modcmd(modcmd_t, void *); 4038 static int raid_modcmd_init(void); 4039 static int raid_modcmd_fini(void); 4040 4041 static int 4042 raid_modcmd(modcmd_t cmd, void *data) 4043 { 4044 int error; 4045 4046 error = 0; 4047 switch (cmd) { 4048 case MODULE_CMD_INIT: 4049 error = raid_modcmd_init(); 4050 break; 4051 case MODULE_CMD_FINI: 4052 error = raid_modcmd_fini(); 4053 break; 4054 default: 4055 error = ENOTTY; 4056 break; 4057 } 4058 return error; 4059 } 4060 4061 static int 4062 raid_modcmd_init(void) 4063 { 4064 int error; 4065 int bmajor, cmajor; 4066 4067 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE); 4068 mutex_enter(&raid_lock); 4069 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 4070 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM); 4071 rf_init_cond2(rf_sparet_wait_cv, "sparetw"); 4072 rf_init_cond2(rf_sparet_resp_cv, "rfgst"); 4073 4074 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 4075 #endif 4076 4077 bmajor = cmajor = -1; 4078 error = devsw_attach("raid", &raid_bdevsw, &bmajor, 4079 &raid_cdevsw, &cmajor); 4080 if (error != 0 && error != EEXIST) { 4081 aprint_error("%s: devsw_attach failed %d\n", __func__, error); 4082 mutex_exit(&raid_lock); 4083 return error; 4084 } 4085 #ifdef _MODULE 4086 error = config_cfdriver_attach(&raid_cd); 4087 if (error != 0) { 4088 aprint_error("%s: config_cfdriver_attach failed %d\n", 4089 __func__, error); 4090 devsw_detach(&raid_bdevsw, &raid_cdevsw); 4091 mutex_exit(&raid_lock); 4092 return error; 4093 } 4094 #endif 4095 error = config_cfattach_attach(raid_cd.cd_name, &raid_ca); 4096 if (error != 0) { 4097 aprint_error("%s: config_cfattach_attach failed %d\n", 4098 __func__, error); 4099 #ifdef _MODULE 4100 config_cfdriver_detach(&raid_cd); 4101 #endif 4102 devsw_detach(&raid_bdevsw, &raid_cdevsw); 4103 mutex_exit(&raid_lock); 4104 return error; 4105 } 4106 4107 raidautoconfigdone = false; 4108 4109 mutex_exit(&raid_lock); 4110 4111 if (error == 0) { 4112 if (rf_BootRaidframe(true) == 0) 4113 aprint_verbose("Kernelized RAIDframe activated\n"); 4114 else 4115 panic("Serious error activating RAID!!"); 4116 } 4117 4118 /* 4119 * Register a finalizer which will be used to auto-config RAID 4120 * sets once all real hardware devices have been found. 4121 */ 4122 error = config_finalize_register(NULL, rf_autoconfig); 4123 if (error != 0) { 4124 aprint_error("WARNING: unable to register RAIDframe " 4125 "finalizer\n"); 4126 error = 0; 4127 } 4128 4129 return error; 4130 } 4131 4132 static int 4133 raid_modcmd_fini(void) 4134 { 4135 int error; 4136 4137 mutex_enter(&raid_lock); 4138 4139 /* Don't allow unload if raid device(s) exist. */ 4140 if (!LIST_EMPTY(&raids)) { 4141 mutex_exit(&raid_lock); 4142 return EBUSY; 4143 } 4144 4145 error = config_cfattach_detach(raid_cd.cd_name, &raid_ca); 4146 if (error != 0) { 4147 aprint_error("%s: cannot detach cfattach\n",__func__); 4148 mutex_exit(&raid_lock); 4149 return error; 4150 } 4151 #ifdef _MODULE 4152 error = config_cfdriver_detach(&raid_cd); 4153 if (error != 0) { 4154 aprint_error("%s: cannot detach cfdriver\n",__func__); 4155 config_cfattach_attach(raid_cd.cd_name, &raid_ca); 4156 mutex_exit(&raid_lock); 4157 return error; 4158 } 4159 #endif 4160 devsw_detach(&raid_bdevsw, &raid_cdevsw); 4161 rf_BootRaidframe(false); 4162 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 4163 rf_destroy_mutex2(rf_sparet_wait_mutex); 4164 rf_destroy_cond2(rf_sparet_wait_cv); 4165 rf_destroy_cond2(rf_sparet_resp_cv); 4166 #endif 4167 mutex_exit(&raid_lock); 4168 mutex_destroy(&raid_lock); 4169 4170 return error; 4171 } 4172