1 /* $NetBSD: rf_netbsdkintf.c,v 1.326 2015/12/08 20:36:15 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Greg Oster; Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * Copyright (c) 1995 Carnegie-Mellon University. 72 * All rights reserved. 73 * 74 * Authors: Mark Holland, Jim Zelenka 75 * 76 * Permission to use, copy, modify and distribute this software and 77 * its documentation is hereby granted, provided that both the copyright 78 * notice and this permission notice appear in all copies of the 79 * software, derivative works or modified versions, and any portions 80 * thereof, and that both notices appear in supporting documentation. 81 * 82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 85 * 86 * Carnegie Mellon requests users of this software to return to 87 * 88 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 89 * School of Computer Science 90 * Carnegie Mellon University 91 * Pittsburgh PA 15213-3890 92 * 93 * any improvements or extensions that they make and grant Carnegie the 94 * rights to redistribute these changes. 95 */ 96 97 /*********************************************************** 98 * 99 * rf_kintf.c -- the kernel interface routines for RAIDframe 100 * 101 ***********************************************************/ 102 103 #include <sys/cdefs.h> 104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.326 2015/12/08 20:36:15 christos Exp $"); 105 106 #ifdef _KERNEL_OPT 107 #include "opt_compat_netbsd.h" 108 #include "opt_raid_autoconfig.h" 109 #endif 110 111 #include <sys/param.h> 112 #include <sys/errno.h> 113 #include <sys/pool.h> 114 #include <sys/proc.h> 115 #include <sys/queue.h> 116 #include <sys/disk.h> 117 #include <sys/device.h> 118 #include <sys/stat.h> 119 #include <sys/ioctl.h> 120 #include <sys/fcntl.h> 121 #include <sys/systm.h> 122 #include <sys/vnode.h> 123 #include <sys/disklabel.h> 124 #include <sys/conf.h> 125 #include <sys/buf.h> 126 #include <sys/bufq.h> 127 #include <sys/reboot.h> 128 #include <sys/kauth.h> 129 130 #include <prop/proplib.h> 131 132 #include <dev/raidframe/raidframevar.h> 133 #include <dev/raidframe/raidframeio.h> 134 #include <dev/raidframe/rf_paritymap.h> 135 136 #include "rf_raid.h" 137 #include "rf_copyback.h" 138 #include "rf_dag.h" 139 #include "rf_dagflags.h" 140 #include "rf_desc.h" 141 #include "rf_diskqueue.h" 142 #include "rf_etimer.h" 143 #include "rf_general.h" 144 #include "rf_kintf.h" 145 #include "rf_options.h" 146 #include "rf_driver.h" 147 #include "rf_parityscan.h" 148 #include "rf_threadstuff.h" 149 150 #ifdef COMPAT_50 151 #include "rf_compat50.h" 152 #endif 153 154 #include "ioconf.h" 155 156 #ifdef DEBUG 157 int rf_kdebug_level = 0; 158 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 159 #else /* DEBUG */ 160 #define db1_printf(a) { } 161 #endif /* DEBUG */ 162 163 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 164 static rf_declare_mutex2(rf_sparet_wait_mutex); 165 static rf_declare_cond2(rf_sparet_wait_cv); 166 static rf_declare_cond2(rf_sparet_resp_cv); 167 168 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 169 * spare table */ 170 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 171 * installation process */ 172 #endif 173 174 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 175 176 /* prototypes */ 177 static void KernelWakeupFunc(struct buf *); 178 static void InitBP(struct buf *, struct vnode *, unsigned, 179 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 180 void *, int, struct proc *); 181 struct raid_softc; 182 static void raidinit(struct raid_softc *); 183 184 static int raid_match(device_t, cfdata_t, void *); 185 static void raid_attach(device_t, device_t, void *); 186 static int raid_detach(device_t, int); 187 188 static int raidread_component_area(dev_t, struct vnode *, void *, size_t, 189 daddr_t, daddr_t); 190 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t, 191 daddr_t, daddr_t, int); 192 193 static int raidwrite_component_label(unsigned, 194 dev_t, struct vnode *, RF_ComponentLabel_t *); 195 static int raidread_component_label(unsigned, 196 dev_t, struct vnode *, RF_ComponentLabel_t *); 197 198 199 static dev_type_open(raidopen); 200 static dev_type_close(raidclose); 201 static dev_type_read(raidread); 202 static dev_type_write(raidwrite); 203 static dev_type_ioctl(raidioctl); 204 static dev_type_strategy(raidstrategy); 205 static dev_type_dump(raiddump); 206 static dev_type_size(raidsize); 207 208 const struct bdevsw raid_bdevsw = { 209 .d_open = raidopen, 210 .d_close = raidclose, 211 .d_strategy = raidstrategy, 212 .d_ioctl = raidioctl, 213 .d_dump = raiddump, 214 .d_psize = raidsize, 215 .d_discard = nodiscard, 216 .d_flag = D_DISK 217 }; 218 219 const struct cdevsw raid_cdevsw = { 220 .d_open = raidopen, 221 .d_close = raidclose, 222 .d_read = raidread, 223 .d_write = raidwrite, 224 .d_ioctl = raidioctl, 225 .d_stop = nostop, 226 .d_tty = notty, 227 .d_poll = nopoll, 228 .d_mmap = nommap, 229 .d_kqfilter = nokqfilter, 230 .d_discard = nodiscard, 231 .d_flag = D_DISK 232 }; 233 234 static struct dkdriver rf_dkdriver = { 235 .d_strategy = raidstrategy, 236 .d_minphys = minphys 237 }; 238 239 struct raid_softc { 240 device_t sc_dev; 241 int sc_unit; 242 int sc_flags; /* flags */ 243 int sc_cflags; /* configuration flags */ 244 uint64_t sc_size; /* size of the raid device */ 245 char sc_xname[20]; /* XXX external name */ 246 struct disk sc_dkdev; /* generic disk device info */ 247 struct bufq_state *buf_queue; /* used for the device queue */ 248 RF_Raid_t sc_r; 249 LIST_ENTRY(raid_softc) sc_link; 250 }; 251 /* sc_flags */ 252 #define RAIDF_INITED 0x01 /* unit has been initialized */ 253 #define RAIDF_WLABEL 0x02 /* label area is writable */ 254 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 255 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */ 256 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 257 #define RAIDF_LOCKED 0x80 /* unit is locked */ 258 259 #define raidunit(x) DISKUNIT(x) 260 261 extern struct cfdriver raid_cd; 262 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc), 263 raid_match, raid_attach, raid_detach, NULL, NULL, NULL, 264 DVF_DETACH_SHUTDOWN); 265 266 /* 267 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 268 * Be aware that large numbers can allow the driver to consume a lot of 269 * kernel memory, especially on writes, and in degraded mode reads. 270 * 271 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 272 * a single 64K write will typically require 64K for the old data, 273 * 64K for the old parity, and 64K for the new parity, for a total 274 * of 192K (if the parity buffer is not re-used immediately). 275 * Even it if is used immediately, that's still 128K, which when multiplied 276 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 277 * 278 * Now in degraded mode, for example, a 64K read on the above setup may 279 * require data reconstruction, which will require *all* of the 4 remaining 280 * disks to participate -- 4 * 32K/disk == 128K again. 281 */ 282 283 #ifndef RAIDOUTSTANDING 284 #define RAIDOUTSTANDING 6 285 #endif 286 287 #define RAIDLABELDEV(dev) \ 288 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 289 290 /* declared here, and made public, for the benefit of KVM stuff.. */ 291 292 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 293 struct disklabel *); 294 static void raidgetdisklabel(dev_t); 295 static void raidmakedisklabel(struct raid_softc *); 296 297 static int raidlock(struct raid_softc *); 298 static void raidunlock(struct raid_softc *); 299 300 static int raid_detach_unlocked(struct raid_softc *); 301 302 static void rf_markalldirty(RF_Raid_t *); 303 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *); 304 305 void rf_ReconThread(struct rf_recon_req *); 306 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 307 void rf_CopybackThread(RF_Raid_t *raidPtr); 308 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 309 int rf_autoconfig(device_t); 310 void rf_buildroothack(RF_ConfigSet_t *); 311 312 RF_AutoConfig_t *rf_find_raid_components(void); 313 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 314 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 315 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t); 316 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 317 int rf_set_autoconfig(RF_Raid_t *, int); 318 int rf_set_rootpartition(RF_Raid_t *, int); 319 void rf_release_all_vps(RF_ConfigSet_t *); 320 void rf_cleanup_config_set(RF_ConfigSet_t *); 321 int rf_have_enough_components(RF_ConfigSet_t *); 322 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *); 323 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t); 324 325 /* 326 * Debugging, mostly. Set to 0 to not allow autoconfig to take place. 327 * Note that this is overridden by having RAID_AUTOCONFIG as an option 328 * in the kernel config file. 329 */ 330 #ifdef RAID_AUTOCONFIG 331 int raidautoconfig = 1; 332 #else 333 int raidautoconfig = 0; 334 #endif 335 static bool raidautoconfigdone = false; 336 337 struct RF_Pools_s rf_pools; 338 339 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids); 340 static kmutex_t raid_lock; 341 342 static struct raid_softc * 343 raidcreate(int unit) { 344 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 345 if (sc == NULL) { 346 #ifdef DIAGNOSTIC 347 printf("%s: out of memory\n", __func__); 348 #endif 349 return NULL; 350 } 351 sc->sc_unit = unit; 352 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK); 353 return sc; 354 } 355 356 static void 357 raiddestroy(struct raid_softc *sc) { 358 bufq_free(sc->buf_queue); 359 kmem_free(sc, sizeof(*sc)); 360 } 361 362 static struct raid_softc * 363 raidget(int unit) { 364 struct raid_softc *sc; 365 if (unit < 0) { 366 #ifdef DIAGNOSTIC 367 panic("%s: unit %d!", __func__, unit); 368 #endif 369 return NULL; 370 } 371 mutex_enter(&raid_lock); 372 LIST_FOREACH(sc, &raids, sc_link) { 373 if (sc->sc_unit == unit) { 374 mutex_exit(&raid_lock); 375 return sc; 376 } 377 } 378 mutex_exit(&raid_lock); 379 if ((sc = raidcreate(unit)) == NULL) 380 return NULL; 381 mutex_enter(&raid_lock); 382 LIST_INSERT_HEAD(&raids, sc, sc_link); 383 mutex_exit(&raid_lock); 384 return sc; 385 } 386 387 static void 388 raidput(struct raid_softc *sc) { 389 mutex_enter(&raid_lock); 390 LIST_REMOVE(sc, sc_link); 391 mutex_exit(&raid_lock); 392 raiddestroy(sc); 393 } 394 395 void 396 raidattach(int num) 397 { 398 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE); 399 /* This is where all the initialization stuff gets done. */ 400 401 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 402 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM); 403 rf_init_cond2(rf_sparet_wait_cv, "sparetw"); 404 rf_init_cond2(rf_sparet_resp_cv, "rfgst"); 405 406 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 407 #endif 408 409 if (rf_BootRaidframe() == 0) 410 aprint_verbose("Kernelized RAIDframe activated\n"); 411 else 412 panic("Serious error booting RAID!!"); 413 414 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) { 415 aprint_error("raidattach: config_cfattach_attach failed?\n"); 416 } 417 418 raidautoconfigdone = false; 419 420 /* 421 * Register a finalizer which will be used to auto-config RAID 422 * sets once all real hardware devices have been found. 423 */ 424 if (config_finalize_register(NULL, rf_autoconfig) != 0) 425 aprint_error("WARNING: unable to register RAIDframe finalizer\n"); 426 } 427 428 int 429 rf_autoconfig(device_t self) 430 { 431 RF_AutoConfig_t *ac_list; 432 RF_ConfigSet_t *config_sets; 433 434 if (!raidautoconfig || raidautoconfigdone == true) 435 return (0); 436 437 /* XXX This code can only be run once. */ 438 raidautoconfigdone = true; 439 440 #ifdef __HAVE_CPU_BOOTCONF 441 /* 442 * 0. find the boot device if needed first so we can use it later 443 * this needs to be done before we autoconfigure any raid sets, 444 * because if we use wedges we are not going to be able to open 445 * the boot device later 446 */ 447 if (booted_device == NULL) 448 cpu_bootconf(); 449 #endif 450 /* 1. locate all RAID components on the system */ 451 aprint_debug("Searching for RAID components...\n"); 452 ac_list = rf_find_raid_components(); 453 454 /* 2. Sort them into their respective sets. */ 455 config_sets = rf_create_auto_sets(ac_list); 456 457 /* 458 * 3. Evaluate each set and configure the valid ones. 459 * This gets done in rf_buildroothack(). 460 */ 461 rf_buildroothack(config_sets); 462 463 return 1; 464 } 465 466 static int 467 rf_containsboot(RF_Raid_t *r, device_t bdv) { 468 const char *bootname = device_xname(bdv); 469 size_t len = strlen(bootname); 470 471 for (int col = 0; col < r->numCol; col++) { 472 const char *devname = r->Disks[col].devname; 473 devname += sizeof("/dev/") - 1; 474 if (strncmp(devname, "dk", 2) == 0) { 475 const char *parent = 476 dkwedge_get_parent_name(r->Disks[col].dev); 477 if (parent != NULL) 478 devname = parent; 479 } 480 if (strncmp(devname, bootname, len) == 0) { 481 struct raid_softc *sc = r->softc; 482 aprint_debug("raid%d includes boot device %s\n", 483 sc->sc_unit, devname); 484 return 1; 485 } 486 } 487 return 0; 488 } 489 490 void 491 rf_buildroothack(RF_ConfigSet_t *config_sets) 492 { 493 RF_ConfigSet_t *cset; 494 RF_ConfigSet_t *next_cset; 495 int num_root; 496 struct raid_softc *sc, *rsc; 497 498 sc = rsc = NULL; 499 num_root = 0; 500 cset = config_sets; 501 while (cset != NULL) { 502 next_cset = cset->next; 503 if (rf_have_enough_components(cset) && 504 cset->ac->clabel->autoconfigure == 1) { 505 sc = rf_auto_config_set(cset); 506 if (sc != NULL) { 507 aprint_debug("raid%d: configured ok\n", 508 sc->sc_unit); 509 if (cset->rootable) { 510 rsc = sc; 511 num_root++; 512 } 513 } else { 514 /* The autoconfig didn't work :( */ 515 aprint_debug("Autoconfig failed\n"); 516 rf_release_all_vps(cset); 517 } 518 } else { 519 /* we're not autoconfiguring this set... 520 release the associated resources */ 521 rf_release_all_vps(cset); 522 } 523 /* cleanup */ 524 rf_cleanup_config_set(cset); 525 cset = next_cset; 526 } 527 528 /* if the user has specified what the root device should be 529 then we don't touch booted_device or boothowto... */ 530 531 if (rootspec != NULL) 532 return; 533 534 /* we found something bootable... */ 535 536 /* 537 * XXX: The following code assumes that the root raid 538 * is the first ('a') partition. This is about the best 539 * we can do with a BSD disklabel, but we might be able 540 * to do better with a GPT label, by setting a specified 541 * attribute to indicate the root partition. We can then 542 * stash the partition number in the r->root_partition 543 * high bits (the bottom 2 bits are already used). For 544 * now we just set booted_partition to 0 when we override 545 * root. 546 */ 547 if (num_root == 1) { 548 device_t candidate_root; 549 if (rsc->sc_dkdev.dk_nwedges != 0) { 550 char cname[sizeof(cset->ac->devname)]; 551 /* XXX: assume 'a' */ 552 snprintf(cname, sizeof(cname), "%s%c", 553 device_xname(rsc->sc_dev), 'a'); 554 candidate_root = dkwedge_find_by_wname(cname); 555 } else 556 candidate_root = rsc->sc_dev; 557 if (booted_device == NULL || 558 rsc->sc_r.root_partition == 1 || 559 rf_containsboot(&rsc->sc_r, booted_device)) { 560 booted_device = candidate_root; 561 booted_partition = 0; /* XXX assume 'a' */ 562 } 563 } else if (num_root > 1) { 564 565 /* 566 * Maybe the MD code can help. If it cannot, then 567 * setroot() will discover that we have no 568 * booted_device and will ask the user if nothing was 569 * hardwired in the kernel config file 570 */ 571 if (booted_device == NULL) 572 return; 573 574 num_root = 0; 575 mutex_enter(&raid_lock); 576 LIST_FOREACH(sc, &raids, sc_link) { 577 RF_Raid_t *r = &sc->sc_r; 578 if (r->valid == 0) 579 continue; 580 581 if (r->root_partition == 0) 582 continue; 583 584 if (rf_containsboot(r, booted_device)) { 585 num_root++; 586 rsc = sc; 587 } 588 } 589 mutex_exit(&raid_lock); 590 591 if (num_root == 1) { 592 booted_device = rsc->sc_dev; 593 booted_partition = 0; /* XXX assume 'a' */ 594 } else { 595 /* we can't guess.. require the user to answer... */ 596 boothowto |= RB_ASKNAME; 597 } 598 } 599 } 600 601 static int 602 raidsize(dev_t dev) 603 { 604 struct raid_softc *rs; 605 struct disklabel *lp; 606 int part, unit, omask, size; 607 608 unit = raidunit(dev); 609 if ((rs = raidget(unit)) == NULL) 610 return -1; 611 if ((rs->sc_flags & RAIDF_INITED) == 0) 612 return (-1); 613 614 part = DISKPART(dev); 615 omask = rs->sc_dkdev.dk_openmask & (1 << part); 616 lp = rs->sc_dkdev.dk_label; 617 618 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp)) 619 return (-1); 620 621 if (lp->d_partitions[part].p_fstype != FS_SWAP) 622 size = -1; 623 else 624 size = lp->d_partitions[part].p_size * 625 (lp->d_secsize / DEV_BSIZE); 626 627 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp)) 628 return (-1); 629 630 return (size); 631 632 } 633 634 static int 635 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 636 { 637 int unit = raidunit(dev); 638 struct raid_softc *rs; 639 const struct bdevsw *bdev; 640 struct disklabel *lp; 641 RF_Raid_t *raidPtr; 642 daddr_t offset; 643 int part, c, sparecol, j, scol, dumpto; 644 int error = 0; 645 646 if ((rs = raidget(unit)) == NULL) 647 return ENXIO; 648 649 raidPtr = &rs->sc_r; 650 651 if ((rs->sc_flags & RAIDF_INITED) == 0) 652 return ENXIO; 653 654 /* we only support dumping to RAID 1 sets */ 655 if (raidPtr->Layout.numDataCol != 1 || 656 raidPtr->Layout.numParityCol != 1) 657 return EINVAL; 658 659 660 if ((error = raidlock(rs)) != 0) 661 return error; 662 663 if (size % DEV_BSIZE != 0) { 664 error = EINVAL; 665 goto out; 666 } 667 668 if (blkno + size / DEV_BSIZE > rs->sc_size) { 669 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > " 670 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno, 671 size / DEV_BSIZE, rs->sc_size); 672 error = EINVAL; 673 goto out; 674 } 675 676 part = DISKPART(dev); 677 lp = rs->sc_dkdev.dk_label; 678 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS; 679 680 /* figure out what device is alive.. */ 681 682 /* 683 Look for a component to dump to. The preference for the 684 component to dump to is as follows: 685 1) the master 686 2) a used_spare of the master 687 3) the slave 688 4) a used_spare of the slave 689 */ 690 691 dumpto = -1; 692 for (c = 0; c < raidPtr->numCol; c++) { 693 if (raidPtr->Disks[c].status == rf_ds_optimal) { 694 /* this might be the one */ 695 dumpto = c; 696 break; 697 } 698 } 699 700 /* 701 At this point we have possibly selected a live master or a 702 live slave. We now check to see if there is a spared 703 master (or a spared slave), if we didn't find a live master 704 or a live slave. 705 */ 706 707 for (c = 0; c < raidPtr->numSpare; c++) { 708 sparecol = raidPtr->numCol + c; 709 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 710 /* How about this one? */ 711 scol = -1; 712 for(j=0;j<raidPtr->numCol;j++) { 713 if (raidPtr->Disks[j].spareCol == sparecol) { 714 scol = j; 715 break; 716 } 717 } 718 if (scol == 0) { 719 /* 720 We must have found a spared master! 721 We'll take that over anything else 722 found so far. (We couldn't have 723 found a real master before, since 724 this is a used spare, and it's 725 saying that it's replacing the 726 master.) On reboot (with 727 autoconfiguration turned on) 728 sparecol will become the 1st 729 component (component0) of this set. 730 */ 731 dumpto = sparecol; 732 break; 733 } else if (scol != -1) { 734 /* 735 Must be a spared slave. We'll dump 736 to that if we havn't found anything 737 else so far. 738 */ 739 if (dumpto == -1) 740 dumpto = sparecol; 741 } 742 } 743 } 744 745 if (dumpto == -1) { 746 /* we couldn't find any live components to dump to!?!? 747 */ 748 error = EINVAL; 749 goto out; 750 } 751 752 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 753 754 /* 755 Note that blkno is relative to this particular partition. 756 By adding the offset of this partition in the RAID 757 set, and also adding RF_PROTECTED_SECTORS, we get a 758 value that is relative to the partition used for the 759 underlying component. 760 */ 761 762 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 763 blkno + offset, va, size); 764 765 out: 766 raidunlock(rs); 767 768 return error; 769 } 770 771 /* ARGSUSED */ 772 static int 773 raidopen(dev_t dev, int flags, int fmt, 774 struct lwp *l) 775 { 776 int unit = raidunit(dev); 777 struct raid_softc *rs; 778 struct disklabel *lp; 779 int part, pmask; 780 int error = 0; 781 782 if ((rs = raidget(unit)) == NULL) 783 return ENXIO; 784 if ((error = raidlock(rs)) != 0) 785 return (error); 786 787 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) { 788 error = EBUSY; 789 goto bad; 790 } 791 792 lp = rs->sc_dkdev.dk_label; 793 794 part = DISKPART(dev); 795 796 /* 797 * If there are wedges, and this is not RAW_PART, then we 798 * need to fail. 799 */ 800 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 801 error = EBUSY; 802 goto bad; 803 } 804 pmask = (1 << part); 805 806 if ((rs->sc_flags & RAIDF_INITED) && 807 (rs->sc_dkdev.dk_nwedges == 0) && 808 (rs->sc_dkdev.dk_openmask == 0)) 809 raidgetdisklabel(dev); 810 811 /* make sure that this partition exists */ 812 813 if (part != RAW_PART) { 814 if (((rs->sc_flags & RAIDF_INITED) == 0) || 815 ((part >= lp->d_npartitions) || 816 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 817 error = ENXIO; 818 goto bad; 819 } 820 } 821 /* Prevent this unit from being unconfigured while open. */ 822 switch (fmt) { 823 case S_IFCHR: 824 rs->sc_dkdev.dk_copenmask |= pmask; 825 break; 826 827 case S_IFBLK: 828 rs->sc_dkdev.dk_bopenmask |= pmask; 829 break; 830 } 831 832 if ((rs->sc_dkdev.dk_openmask == 0) && 833 ((rs->sc_flags & RAIDF_INITED) != 0)) { 834 /* First one... mark things as dirty... Note that we *MUST* 835 have done a configure before this. I DO NOT WANT TO BE 836 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 837 THAT THEY BELONG TOGETHER!!!!! */ 838 /* XXX should check to see if we're only open for reading 839 here... If so, we needn't do this, but then need some 840 other way of keeping track of what's happened.. */ 841 842 rf_markalldirty(&rs->sc_r); 843 } 844 845 846 rs->sc_dkdev.dk_openmask = 847 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 848 849 bad: 850 raidunlock(rs); 851 852 return (error); 853 854 855 } 856 857 /* ARGSUSED */ 858 static int 859 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 860 { 861 int unit = raidunit(dev); 862 struct raid_softc *rs; 863 int error = 0; 864 int part; 865 866 if ((rs = raidget(unit)) == NULL) 867 return ENXIO; 868 869 if ((error = raidlock(rs)) != 0) 870 return (error); 871 872 part = DISKPART(dev); 873 874 /* ...that much closer to allowing unconfiguration... */ 875 switch (fmt) { 876 case S_IFCHR: 877 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 878 break; 879 880 case S_IFBLK: 881 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 882 break; 883 } 884 rs->sc_dkdev.dk_openmask = 885 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 886 887 if ((rs->sc_dkdev.dk_openmask == 0) && 888 ((rs->sc_flags & RAIDF_INITED) != 0)) { 889 /* Last one... device is not unconfigured yet. 890 Device shutdown has taken care of setting the 891 clean bits if RAIDF_INITED is not set 892 mark things as clean... */ 893 894 rf_update_component_labels(&rs->sc_r, 895 RF_FINAL_COMPONENT_UPDATE); 896 897 /* If the kernel is shutting down, it will detach 898 * this RAID set soon enough. 899 */ 900 } 901 902 raidunlock(rs); 903 return (0); 904 905 } 906 907 static void 908 raidstrategy(struct buf *bp) 909 { 910 unsigned int unit = raidunit(bp->b_dev); 911 RF_Raid_t *raidPtr; 912 int wlabel; 913 struct raid_softc *rs; 914 915 if ((rs = raidget(unit)) == NULL) { 916 bp->b_error = ENXIO; 917 goto done; 918 } 919 if ((rs->sc_flags & RAIDF_INITED) == 0) { 920 bp->b_error = ENXIO; 921 goto done; 922 } 923 raidPtr = &rs->sc_r; 924 if (!raidPtr->valid) { 925 bp->b_error = ENODEV; 926 goto done; 927 } 928 if (bp->b_bcount == 0) { 929 db1_printf(("b_bcount is zero..\n")); 930 goto done; 931 } 932 933 /* 934 * Do bounds checking and adjust transfer. If there's an 935 * error, the bounds check will flag that for us. 936 */ 937 938 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 939 if (DISKPART(bp->b_dev) == RAW_PART) { 940 uint64_t size; /* device size in DEV_BSIZE unit */ 941 942 if (raidPtr->logBytesPerSector > DEV_BSHIFT) { 943 size = raidPtr->totalSectors << 944 (raidPtr->logBytesPerSector - DEV_BSHIFT); 945 } else { 946 size = raidPtr->totalSectors >> 947 (DEV_BSHIFT - raidPtr->logBytesPerSector); 948 } 949 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) { 950 goto done; 951 } 952 } else { 953 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 954 db1_printf(("Bounds check failed!!:%d %d\n", 955 (int) bp->b_blkno, (int) wlabel)); 956 goto done; 957 } 958 } 959 960 rf_lock_mutex2(raidPtr->iodone_lock); 961 962 bp->b_resid = 0; 963 964 /* stuff it onto our queue */ 965 bufq_put(rs->buf_queue, bp); 966 967 /* scheduled the IO to happen at the next convenient time */ 968 rf_signal_cond2(raidPtr->iodone_cv); 969 rf_unlock_mutex2(raidPtr->iodone_lock); 970 971 return; 972 973 done: 974 bp->b_resid = bp->b_bcount; 975 biodone(bp); 976 } 977 978 /* ARGSUSED */ 979 static int 980 raidread(dev_t dev, struct uio *uio, int flags) 981 { 982 int unit = raidunit(dev); 983 struct raid_softc *rs; 984 985 if ((rs = raidget(unit)) == NULL) 986 return ENXIO; 987 988 if ((rs->sc_flags & RAIDF_INITED) == 0) 989 return (ENXIO); 990 991 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 992 993 } 994 995 /* ARGSUSED */ 996 static int 997 raidwrite(dev_t dev, struct uio *uio, int flags) 998 { 999 int unit = raidunit(dev); 1000 struct raid_softc *rs; 1001 1002 if ((rs = raidget(unit)) == NULL) 1003 return ENXIO; 1004 1005 if ((rs->sc_flags & RAIDF_INITED) == 0) 1006 return (ENXIO); 1007 1008 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 1009 1010 } 1011 1012 static int 1013 raid_detach_unlocked(struct raid_softc *rs) 1014 { 1015 int error; 1016 RF_Raid_t *raidPtr; 1017 1018 raidPtr = &rs->sc_r; 1019 1020 /* 1021 * If somebody has a partition mounted, we shouldn't 1022 * shutdown. 1023 */ 1024 if (rs->sc_dkdev.dk_openmask != 0) 1025 return EBUSY; 1026 1027 if ((rs->sc_flags & RAIDF_INITED) == 0) 1028 ; /* not initialized: nothing to do */ 1029 else if ((error = rf_Shutdown(raidPtr)) != 0) 1030 return error; 1031 else 1032 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN); 1033 1034 /* Detach the disk. */ 1035 dkwedge_delall(&rs->sc_dkdev); 1036 disk_detach(&rs->sc_dkdev); 1037 disk_destroy(&rs->sc_dkdev); 1038 1039 aprint_normal_dev(rs->sc_dev, "detached\n"); 1040 1041 return 0; 1042 } 1043 1044 static int 1045 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1046 { 1047 int unit = raidunit(dev); 1048 int error = 0; 1049 int part, pmask, s; 1050 cfdata_t cf; 1051 struct raid_softc *rs; 1052 RF_Config_t *k_cfg, *u_cfg; 1053 RF_Raid_t *raidPtr; 1054 RF_RaidDisk_t *diskPtr; 1055 RF_AccTotals_t *totals; 1056 RF_DeviceConfig_t *d_cfg, **ucfgp; 1057 u_char *specific_buf; 1058 int retcode = 0; 1059 int column; 1060 /* int raidid; */ 1061 struct rf_recon_req *rrcopy, *rr; 1062 RF_ComponentLabel_t *clabel; 1063 RF_ComponentLabel_t *ci_label; 1064 RF_ComponentLabel_t **clabel_ptr; 1065 RF_SingleComponent_t *sparePtr,*componentPtr; 1066 RF_SingleComponent_t component; 1067 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 1068 int i, j, d; 1069 #ifdef __HAVE_OLD_DISKLABEL 1070 struct disklabel newlabel; 1071 #endif 1072 1073 if ((rs = raidget(unit)) == NULL) 1074 return ENXIO; 1075 raidPtr = &rs->sc_r; 1076 1077 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev, 1078 (int) DISKPART(dev), (int) unit, cmd)); 1079 1080 /* Must be open for writes for these commands... */ 1081 switch (cmd) { 1082 #ifdef DIOCGSECTORSIZE 1083 case DIOCGSECTORSIZE: 1084 *(u_int *)data = raidPtr->bytesPerSector; 1085 return 0; 1086 case DIOCGMEDIASIZE: 1087 *(off_t *)data = 1088 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector; 1089 return 0; 1090 #endif 1091 case DIOCSDINFO: 1092 case DIOCWDINFO: 1093 #ifdef __HAVE_OLD_DISKLABEL 1094 case ODIOCWDINFO: 1095 case ODIOCSDINFO: 1096 #endif 1097 case DIOCWLABEL: 1098 case DIOCAWEDGE: 1099 case DIOCDWEDGE: 1100 case DIOCMWEDGES: 1101 case DIOCSSTRATEGY: 1102 if ((flag & FWRITE) == 0) 1103 return (EBADF); 1104 } 1105 1106 /* Must be initialized for these... */ 1107 switch (cmd) { 1108 case DIOCGDINFO: 1109 case DIOCSDINFO: 1110 case DIOCWDINFO: 1111 #ifdef __HAVE_OLD_DISKLABEL 1112 case ODIOCGDINFO: 1113 case ODIOCWDINFO: 1114 case ODIOCSDINFO: 1115 case ODIOCGDEFLABEL: 1116 #endif 1117 case DIOCGPARTINFO: 1118 case DIOCWLABEL: 1119 case DIOCGDEFLABEL: 1120 case DIOCAWEDGE: 1121 case DIOCDWEDGE: 1122 case DIOCLWEDGES: 1123 case DIOCMWEDGES: 1124 case DIOCCACHESYNC: 1125 case RAIDFRAME_SHUTDOWN: 1126 case RAIDFRAME_REWRITEPARITY: 1127 case RAIDFRAME_GET_INFO: 1128 case RAIDFRAME_RESET_ACCTOTALS: 1129 case RAIDFRAME_GET_ACCTOTALS: 1130 case RAIDFRAME_KEEP_ACCTOTALS: 1131 case RAIDFRAME_GET_SIZE: 1132 case RAIDFRAME_FAIL_DISK: 1133 case RAIDFRAME_COPYBACK: 1134 case RAIDFRAME_CHECK_RECON_STATUS: 1135 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1136 case RAIDFRAME_GET_COMPONENT_LABEL: 1137 case RAIDFRAME_SET_COMPONENT_LABEL: 1138 case RAIDFRAME_ADD_HOT_SPARE: 1139 case RAIDFRAME_REMOVE_HOT_SPARE: 1140 case RAIDFRAME_INIT_LABELS: 1141 case RAIDFRAME_REBUILD_IN_PLACE: 1142 case RAIDFRAME_CHECK_PARITY: 1143 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1144 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1145 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1146 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1147 case RAIDFRAME_SET_AUTOCONFIG: 1148 case RAIDFRAME_SET_ROOT: 1149 case RAIDFRAME_DELETE_COMPONENT: 1150 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1151 case RAIDFRAME_PARITYMAP_STATUS: 1152 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1153 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1154 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1155 case DIOCGSTRATEGY: 1156 case DIOCSSTRATEGY: 1157 if ((rs->sc_flags & RAIDF_INITED) == 0) 1158 return (ENXIO); 1159 } 1160 1161 switch (cmd) { 1162 #ifdef COMPAT_50 1163 case RAIDFRAME_GET_INFO50: 1164 return rf_get_info50(raidPtr, data); 1165 1166 case RAIDFRAME_CONFIGURE50: 1167 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0) 1168 return retcode; 1169 goto config; 1170 #endif 1171 /* configure the system */ 1172 case RAIDFRAME_CONFIGURE: 1173 1174 if (raidPtr->valid) { 1175 /* There is a valid RAID set running on this unit! */ 1176 printf("raid%d: Device already configured!\n",unit); 1177 return(EINVAL); 1178 } 1179 1180 /* copy-in the configuration information */ 1181 /* data points to a pointer to the configuration structure */ 1182 1183 u_cfg = *((RF_Config_t **) data); 1184 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 1185 if (k_cfg == NULL) { 1186 return (ENOMEM); 1187 } 1188 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 1189 if (retcode) { 1190 RF_Free(k_cfg, sizeof(RF_Config_t)); 1191 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 1192 retcode)); 1193 return (retcode); 1194 } 1195 goto config; 1196 config: 1197 /* allocate a buffer for the layout-specific data, and copy it 1198 * in */ 1199 if (k_cfg->layoutSpecificSize) { 1200 if (k_cfg->layoutSpecificSize > 10000) { 1201 /* sanity check */ 1202 RF_Free(k_cfg, sizeof(RF_Config_t)); 1203 return (EINVAL); 1204 } 1205 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 1206 (u_char *)); 1207 if (specific_buf == NULL) { 1208 RF_Free(k_cfg, sizeof(RF_Config_t)); 1209 return (ENOMEM); 1210 } 1211 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1212 k_cfg->layoutSpecificSize); 1213 if (retcode) { 1214 RF_Free(k_cfg, sizeof(RF_Config_t)); 1215 RF_Free(specific_buf, 1216 k_cfg->layoutSpecificSize); 1217 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 1218 retcode)); 1219 return (retcode); 1220 } 1221 } else 1222 specific_buf = NULL; 1223 k_cfg->layoutSpecific = specific_buf; 1224 1225 /* should do some kind of sanity check on the configuration. 1226 * Store the sum of all the bytes in the last byte? */ 1227 1228 /* configure the system */ 1229 1230 /* 1231 * Clear the entire RAID descriptor, just to make sure 1232 * there is no stale data left in the case of a 1233 * reconfiguration 1234 */ 1235 memset(raidPtr, 0, sizeof(*raidPtr)); 1236 raidPtr->softc = rs; 1237 raidPtr->raidid = unit; 1238 1239 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1240 1241 if (retcode == 0) { 1242 1243 /* allow this many simultaneous IO's to 1244 this RAID device */ 1245 raidPtr->openings = RAIDOUTSTANDING; 1246 1247 raidinit(rs); 1248 rf_markalldirty(raidPtr); 1249 } 1250 /* free the buffers. No return code here. */ 1251 if (k_cfg->layoutSpecificSize) { 1252 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1253 } 1254 RF_Free(k_cfg, sizeof(RF_Config_t)); 1255 1256 return (retcode); 1257 1258 /* shutdown the system */ 1259 case RAIDFRAME_SHUTDOWN: 1260 1261 part = DISKPART(dev); 1262 pmask = (1 << part); 1263 1264 if ((error = raidlock(rs)) != 0) 1265 return (error); 1266 1267 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 1268 ((rs->sc_dkdev.dk_bopenmask & pmask) && 1269 (rs->sc_dkdev.dk_copenmask & pmask))) 1270 retcode = EBUSY; 1271 else { 1272 rs->sc_flags |= RAIDF_SHUTDOWN; 1273 rs->sc_dkdev.dk_copenmask &= ~pmask; 1274 rs->sc_dkdev.dk_bopenmask &= ~pmask; 1275 rs->sc_dkdev.dk_openmask &= ~pmask; 1276 retcode = 0; 1277 } 1278 1279 raidunlock(rs); 1280 1281 if (retcode != 0) 1282 return retcode; 1283 1284 /* free the pseudo device attach bits */ 1285 1286 cf = device_cfdata(rs->sc_dev); 1287 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0) 1288 free(cf, M_RAIDFRAME); 1289 1290 return (retcode); 1291 case RAIDFRAME_GET_COMPONENT_LABEL: 1292 clabel_ptr = (RF_ComponentLabel_t **) data; 1293 /* need to read the component label for the disk indicated 1294 by row,column in clabel */ 1295 1296 /* 1297 * Perhaps there should be an option to skip the in-core 1298 * copy and hit the disk, as with disklabel(8). 1299 */ 1300 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *)); 1301 1302 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel)); 1303 1304 if (retcode) { 1305 RF_Free(clabel, sizeof(*clabel)); 1306 return retcode; 1307 } 1308 1309 clabel->row = 0; /* Don't allow looking at anything else.*/ 1310 1311 column = clabel->column; 1312 1313 if ((column < 0) || (column >= raidPtr->numCol + 1314 raidPtr->numSpare)) { 1315 RF_Free(clabel, sizeof(*clabel)); 1316 return EINVAL; 1317 } 1318 1319 RF_Free(clabel, sizeof(*clabel)); 1320 1321 clabel = raidget_component_label(raidPtr, column); 1322 1323 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr)); 1324 1325 #if 0 1326 case RAIDFRAME_SET_COMPONENT_LABEL: 1327 clabel = (RF_ComponentLabel_t *) data; 1328 1329 /* XXX check the label for valid stuff... */ 1330 /* Note that some things *should not* get modified -- 1331 the user should be re-initing the labels instead of 1332 trying to patch things. 1333 */ 1334 1335 raidid = raidPtr->raidid; 1336 #ifdef DEBUG 1337 printf("raid%d: Got component label:\n", raidid); 1338 printf("raid%d: Version: %d\n", raidid, clabel->version); 1339 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1340 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1341 printf("raid%d: Column: %d\n", raidid, clabel->column); 1342 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1343 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1344 printf("raid%d: Status: %d\n", raidid, clabel->status); 1345 #endif 1346 clabel->row = 0; 1347 column = clabel->column; 1348 1349 if ((column < 0) || (column >= raidPtr->numCol)) { 1350 return(EINVAL); 1351 } 1352 1353 /* XXX this isn't allowed to do anything for now :-) */ 1354 1355 /* XXX and before it is, we need to fill in the rest 1356 of the fields!?!?!?! */ 1357 memcpy(raidget_component_label(raidPtr, column), 1358 clabel, sizeof(*clabel)); 1359 raidflush_component_label(raidPtr, column); 1360 return (0); 1361 #endif 1362 1363 case RAIDFRAME_INIT_LABELS: 1364 clabel = (RF_ComponentLabel_t *) data; 1365 /* 1366 we only want the serial number from 1367 the above. We get all the rest of the information 1368 from the config that was used to create this RAID 1369 set. 1370 */ 1371 1372 raidPtr->serial_number = clabel->serial_number; 1373 1374 for(column=0;column<raidPtr->numCol;column++) { 1375 diskPtr = &raidPtr->Disks[column]; 1376 if (!RF_DEAD_DISK(diskPtr->status)) { 1377 ci_label = raidget_component_label(raidPtr, 1378 column); 1379 /* Zeroing this is important. */ 1380 memset(ci_label, 0, sizeof(*ci_label)); 1381 raid_init_component_label(raidPtr, ci_label); 1382 ci_label->serial_number = 1383 raidPtr->serial_number; 1384 ci_label->row = 0; /* we dont' pretend to support more */ 1385 rf_component_label_set_partitionsize(ci_label, 1386 diskPtr->partitionSize); 1387 ci_label->column = column; 1388 raidflush_component_label(raidPtr, column); 1389 } 1390 /* XXXjld what about the spares? */ 1391 } 1392 1393 return (retcode); 1394 case RAIDFRAME_SET_AUTOCONFIG: 1395 d = rf_set_autoconfig(raidPtr, *(int *) data); 1396 printf("raid%d: New autoconfig value is: %d\n", 1397 raidPtr->raidid, d); 1398 *(int *) data = d; 1399 return (retcode); 1400 1401 case RAIDFRAME_SET_ROOT: 1402 d = rf_set_rootpartition(raidPtr, *(int *) data); 1403 printf("raid%d: New rootpartition value is: %d\n", 1404 raidPtr->raidid, d); 1405 *(int *) data = d; 1406 return (retcode); 1407 1408 /* initialize all parity */ 1409 case RAIDFRAME_REWRITEPARITY: 1410 1411 if (raidPtr->Layout.map->faultsTolerated == 0) { 1412 /* Parity for RAID 0 is trivially correct */ 1413 raidPtr->parity_good = RF_RAID_CLEAN; 1414 return(0); 1415 } 1416 1417 if (raidPtr->parity_rewrite_in_progress == 1) { 1418 /* Re-write is already in progress! */ 1419 return(EINVAL); 1420 } 1421 1422 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1423 rf_RewriteParityThread, 1424 raidPtr,"raid_parity"); 1425 return (retcode); 1426 1427 1428 case RAIDFRAME_ADD_HOT_SPARE: 1429 sparePtr = (RF_SingleComponent_t *) data; 1430 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t)); 1431 retcode = rf_add_hot_spare(raidPtr, &component); 1432 return(retcode); 1433 1434 case RAIDFRAME_REMOVE_HOT_SPARE: 1435 return(retcode); 1436 1437 case RAIDFRAME_DELETE_COMPONENT: 1438 componentPtr = (RF_SingleComponent_t *)data; 1439 memcpy( &component, componentPtr, 1440 sizeof(RF_SingleComponent_t)); 1441 retcode = rf_delete_component(raidPtr, &component); 1442 return(retcode); 1443 1444 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1445 componentPtr = (RF_SingleComponent_t *)data; 1446 memcpy( &component, componentPtr, 1447 sizeof(RF_SingleComponent_t)); 1448 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1449 return(retcode); 1450 1451 case RAIDFRAME_REBUILD_IN_PLACE: 1452 1453 if (raidPtr->Layout.map->faultsTolerated == 0) { 1454 /* Can't do this on a RAID 0!! */ 1455 return(EINVAL); 1456 } 1457 1458 if (raidPtr->recon_in_progress == 1) { 1459 /* a reconstruct is already in progress! */ 1460 return(EINVAL); 1461 } 1462 1463 componentPtr = (RF_SingleComponent_t *) data; 1464 memcpy( &component, componentPtr, 1465 sizeof(RF_SingleComponent_t)); 1466 component.row = 0; /* we don't support any more */ 1467 column = component.column; 1468 1469 if ((column < 0) || (column >= raidPtr->numCol)) { 1470 return(EINVAL); 1471 } 1472 1473 rf_lock_mutex2(raidPtr->mutex); 1474 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1475 (raidPtr->numFailures > 0)) { 1476 /* XXX 0 above shouldn't be constant!!! */ 1477 /* some component other than this has failed. 1478 Let's not make things worse than they already 1479 are... */ 1480 printf("raid%d: Unable to reconstruct to disk at:\n", 1481 raidPtr->raidid); 1482 printf("raid%d: Col: %d Too many failures.\n", 1483 raidPtr->raidid, column); 1484 rf_unlock_mutex2(raidPtr->mutex); 1485 return (EINVAL); 1486 } 1487 if (raidPtr->Disks[column].status == 1488 rf_ds_reconstructing) { 1489 printf("raid%d: Unable to reconstruct to disk at:\n", 1490 raidPtr->raidid); 1491 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column); 1492 1493 rf_unlock_mutex2(raidPtr->mutex); 1494 return (EINVAL); 1495 } 1496 if (raidPtr->Disks[column].status == rf_ds_spared) { 1497 rf_unlock_mutex2(raidPtr->mutex); 1498 return (EINVAL); 1499 } 1500 rf_unlock_mutex2(raidPtr->mutex); 1501 1502 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1503 if (rrcopy == NULL) 1504 return(ENOMEM); 1505 1506 rrcopy->raidPtr = (void *) raidPtr; 1507 rrcopy->col = column; 1508 1509 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1510 rf_ReconstructInPlaceThread, 1511 rrcopy,"raid_reconip"); 1512 return(retcode); 1513 1514 case RAIDFRAME_GET_INFO: 1515 if (!raidPtr->valid) 1516 return (ENODEV); 1517 ucfgp = (RF_DeviceConfig_t **) data; 1518 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1519 (RF_DeviceConfig_t *)); 1520 if (d_cfg == NULL) 1521 return (ENOMEM); 1522 d_cfg->rows = 1; /* there is only 1 row now */ 1523 d_cfg->cols = raidPtr->numCol; 1524 d_cfg->ndevs = raidPtr->numCol; 1525 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1526 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1527 return (ENOMEM); 1528 } 1529 d_cfg->nspares = raidPtr->numSpare; 1530 if (d_cfg->nspares >= RF_MAX_DISKS) { 1531 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1532 return (ENOMEM); 1533 } 1534 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1535 d = 0; 1536 for (j = 0; j < d_cfg->cols; j++) { 1537 d_cfg->devs[d] = raidPtr->Disks[j]; 1538 d++; 1539 } 1540 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1541 d_cfg->spares[i] = raidPtr->Disks[j]; 1542 if (d_cfg->spares[i].status == rf_ds_rebuilding_spare) { 1543 /* XXX: raidctl(8) expects to see this as a used spare */ 1544 d_cfg->spares[i].status = rf_ds_used_spare; 1545 } 1546 } 1547 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1548 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1549 1550 return (retcode); 1551 1552 case RAIDFRAME_CHECK_PARITY: 1553 *(int *) data = raidPtr->parity_good; 1554 return (0); 1555 1556 case RAIDFRAME_PARITYMAP_STATUS: 1557 if (rf_paritymap_ineligible(raidPtr)) 1558 return EINVAL; 1559 rf_paritymap_status(raidPtr->parity_map, 1560 (struct rf_pmstat *)data); 1561 return 0; 1562 1563 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1564 if (rf_paritymap_ineligible(raidPtr)) 1565 return EINVAL; 1566 if (raidPtr->parity_map == NULL) 1567 return ENOENT; /* ??? */ 1568 if (0 != rf_paritymap_set_params(raidPtr->parity_map, 1569 (struct rf_pmparams *)data, 1)) 1570 return EINVAL; 1571 return 0; 1572 1573 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1574 if (rf_paritymap_ineligible(raidPtr)) 1575 return EINVAL; 1576 *(int *) data = rf_paritymap_get_disable(raidPtr); 1577 return 0; 1578 1579 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1580 if (rf_paritymap_ineligible(raidPtr)) 1581 return EINVAL; 1582 rf_paritymap_set_disable(raidPtr, *(int *)data); 1583 /* XXX should errors be passed up? */ 1584 return 0; 1585 1586 case RAIDFRAME_RESET_ACCTOTALS: 1587 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1588 return (0); 1589 1590 case RAIDFRAME_GET_ACCTOTALS: 1591 totals = (RF_AccTotals_t *) data; 1592 *totals = raidPtr->acc_totals; 1593 return (0); 1594 1595 case RAIDFRAME_KEEP_ACCTOTALS: 1596 raidPtr->keep_acc_totals = *(int *)data; 1597 return (0); 1598 1599 case RAIDFRAME_GET_SIZE: 1600 *(int *) data = raidPtr->totalSectors; 1601 return (0); 1602 1603 /* fail a disk & optionally start reconstruction */ 1604 case RAIDFRAME_FAIL_DISK: 1605 1606 if (raidPtr->Layout.map->faultsTolerated == 0) { 1607 /* Can't do this on a RAID 0!! */ 1608 return(EINVAL); 1609 } 1610 1611 rr = (struct rf_recon_req *) data; 1612 rr->row = 0; 1613 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1614 return (EINVAL); 1615 1616 1617 rf_lock_mutex2(raidPtr->mutex); 1618 if (raidPtr->status == rf_rs_reconstructing) { 1619 /* you can't fail a disk while we're reconstructing! */ 1620 /* XXX wrong for RAID6 */ 1621 rf_unlock_mutex2(raidPtr->mutex); 1622 return (EINVAL); 1623 } 1624 if ((raidPtr->Disks[rr->col].status == 1625 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1626 /* some other component has failed. Let's not make 1627 things worse. XXX wrong for RAID6 */ 1628 rf_unlock_mutex2(raidPtr->mutex); 1629 return (EINVAL); 1630 } 1631 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1632 /* Can't fail a spared disk! */ 1633 rf_unlock_mutex2(raidPtr->mutex); 1634 return (EINVAL); 1635 } 1636 rf_unlock_mutex2(raidPtr->mutex); 1637 1638 /* make a copy of the recon request so that we don't rely on 1639 * the user's buffer */ 1640 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1641 if (rrcopy == NULL) 1642 return(ENOMEM); 1643 memcpy(rrcopy, rr, sizeof(*rr)); 1644 rrcopy->raidPtr = (void *) raidPtr; 1645 1646 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1647 rf_ReconThread, 1648 rrcopy,"raid_recon"); 1649 return (0); 1650 1651 /* invoke a copyback operation after recon on whatever disk 1652 * needs it, if any */ 1653 case RAIDFRAME_COPYBACK: 1654 1655 if (raidPtr->Layout.map->faultsTolerated == 0) { 1656 /* This makes no sense on a RAID 0!! */ 1657 return(EINVAL); 1658 } 1659 1660 if (raidPtr->copyback_in_progress == 1) { 1661 /* Copyback is already in progress! */ 1662 return(EINVAL); 1663 } 1664 1665 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1666 rf_CopybackThread, 1667 raidPtr,"raid_copyback"); 1668 return (retcode); 1669 1670 /* return the percentage completion of reconstruction */ 1671 case RAIDFRAME_CHECK_RECON_STATUS: 1672 if (raidPtr->Layout.map->faultsTolerated == 0) { 1673 /* This makes no sense on a RAID 0, so tell the 1674 user it's done. */ 1675 *(int *) data = 100; 1676 return(0); 1677 } 1678 if (raidPtr->status != rf_rs_reconstructing) 1679 *(int *) data = 100; 1680 else { 1681 if (raidPtr->reconControl->numRUsTotal > 0) { 1682 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1683 } else { 1684 *(int *) data = 0; 1685 } 1686 } 1687 return (0); 1688 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1689 progressInfoPtr = (RF_ProgressInfo_t **) data; 1690 if (raidPtr->status != rf_rs_reconstructing) { 1691 progressInfo.remaining = 0; 1692 progressInfo.completed = 100; 1693 progressInfo.total = 100; 1694 } else { 1695 progressInfo.total = 1696 raidPtr->reconControl->numRUsTotal; 1697 progressInfo.completed = 1698 raidPtr->reconControl->numRUsComplete; 1699 progressInfo.remaining = progressInfo.total - 1700 progressInfo.completed; 1701 } 1702 retcode = copyout(&progressInfo, *progressInfoPtr, 1703 sizeof(RF_ProgressInfo_t)); 1704 return (retcode); 1705 1706 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1707 if (raidPtr->Layout.map->faultsTolerated == 0) { 1708 /* This makes no sense on a RAID 0, so tell the 1709 user it's done. */ 1710 *(int *) data = 100; 1711 return(0); 1712 } 1713 if (raidPtr->parity_rewrite_in_progress == 1) { 1714 *(int *) data = 100 * 1715 raidPtr->parity_rewrite_stripes_done / 1716 raidPtr->Layout.numStripe; 1717 } else { 1718 *(int *) data = 100; 1719 } 1720 return (0); 1721 1722 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1723 progressInfoPtr = (RF_ProgressInfo_t **) data; 1724 if (raidPtr->parity_rewrite_in_progress == 1) { 1725 progressInfo.total = raidPtr->Layout.numStripe; 1726 progressInfo.completed = 1727 raidPtr->parity_rewrite_stripes_done; 1728 progressInfo.remaining = progressInfo.total - 1729 progressInfo.completed; 1730 } else { 1731 progressInfo.remaining = 0; 1732 progressInfo.completed = 100; 1733 progressInfo.total = 100; 1734 } 1735 retcode = copyout(&progressInfo, *progressInfoPtr, 1736 sizeof(RF_ProgressInfo_t)); 1737 return (retcode); 1738 1739 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1740 if (raidPtr->Layout.map->faultsTolerated == 0) { 1741 /* This makes no sense on a RAID 0 */ 1742 *(int *) data = 100; 1743 return(0); 1744 } 1745 if (raidPtr->copyback_in_progress == 1) { 1746 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1747 raidPtr->Layout.numStripe; 1748 } else { 1749 *(int *) data = 100; 1750 } 1751 return (0); 1752 1753 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1754 progressInfoPtr = (RF_ProgressInfo_t **) data; 1755 if (raidPtr->copyback_in_progress == 1) { 1756 progressInfo.total = raidPtr->Layout.numStripe; 1757 progressInfo.completed = 1758 raidPtr->copyback_stripes_done; 1759 progressInfo.remaining = progressInfo.total - 1760 progressInfo.completed; 1761 } else { 1762 progressInfo.remaining = 0; 1763 progressInfo.completed = 100; 1764 progressInfo.total = 100; 1765 } 1766 retcode = copyout(&progressInfo, *progressInfoPtr, 1767 sizeof(RF_ProgressInfo_t)); 1768 return (retcode); 1769 1770 /* the sparetable daemon calls this to wait for the kernel to 1771 * need a spare table. this ioctl does not return until a 1772 * spare table is needed. XXX -- calling mpsleep here in the 1773 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1774 * -- I should either compute the spare table in the kernel, 1775 * or have a different -- XXX XXX -- interface (a different 1776 * character device) for delivering the table -- XXX */ 1777 #if 0 1778 case RAIDFRAME_SPARET_WAIT: 1779 rf_lock_mutex2(rf_sparet_wait_mutex); 1780 while (!rf_sparet_wait_queue) 1781 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex); 1782 waitreq = rf_sparet_wait_queue; 1783 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1784 rf_unlock_mutex2(rf_sparet_wait_mutex); 1785 1786 /* structure assignment */ 1787 *((RF_SparetWait_t *) data) = *waitreq; 1788 1789 RF_Free(waitreq, sizeof(*waitreq)); 1790 return (0); 1791 1792 /* wakes up a process waiting on SPARET_WAIT and puts an error 1793 * code in it that will cause the dameon to exit */ 1794 case RAIDFRAME_ABORT_SPARET_WAIT: 1795 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1796 waitreq->fcol = -1; 1797 rf_lock_mutex2(rf_sparet_wait_mutex); 1798 waitreq->next = rf_sparet_wait_queue; 1799 rf_sparet_wait_queue = waitreq; 1800 rf_broadcast_conf2(rf_sparet_wait_cv); 1801 rf_unlock_mutex2(rf_sparet_wait_mutex); 1802 return (0); 1803 1804 /* used by the spare table daemon to deliver a spare table 1805 * into the kernel */ 1806 case RAIDFRAME_SEND_SPARET: 1807 1808 /* install the spare table */ 1809 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1810 1811 /* respond to the requestor. the return status of the spare 1812 * table installation is passed in the "fcol" field */ 1813 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1814 waitreq->fcol = retcode; 1815 rf_lock_mutex2(rf_sparet_wait_mutex); 1816 waitreq->next = rf_sparet_resp_queue; 1817 rf_sparet_resp_queue = waitreq; 1818 rf_broadcast_cond2(rf_sparet_resp_cv); 1819 rf_unlock_mutex2(rf_sparet_wait_mutex); 1820 1821 return (retcode); 1822 #endif 1823 1824 default: 1825 break; /* fall through to the os-specific code below */ 1826 1827 } 1828 1829 if (!raidPtr->valid) 1830 return (EINVAL); 1831 1832 /* 1833 * Add support for "regular" device ioctls here. 1834 */ 1835 1836 error = disk_ioctl(&rs->sc_dkdev, dev, cmd, data, flag, l); 1837 if (error != EPASSTHROUGH) 1838 return (error); 1839 1840 switch (cmd) { 1841 case DIOCWDINFO: 1842 case DIOCSDINFO: 1843 #ifdef __HAVE_OLD_DISKLABEL 1844 case ODIOCWDINFO: 1845 case ODIOCSDINFO: 1846 #endif 1847 { 1848 struct disklabel *lp; 1849 #ifdef __HAVE_OLD_DISKLABEL 1850 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1851 memset(&newlabel, 0, sizeof newlabel); 1852 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1853 lp = &newlabel; 1854 } else 1855 #endif 1856 lp = (struct disklabel *)data; 1857 1858 if ((error = raidlock(rs)) != 0) 1859 return (error); 1860 1861 rs->sc_flags |= RAIDF_LABELLING; 1862 1863 error = setdisklabel(rs->sc_dkdev.dk_label, 1864 lp, 0, rs->sc_dkdev.dk_cpulabel); 1865 if (error == 0) { 1866 if (cmd == DIOCWDINFO 1867 #ifdef __HAVE_OLD_DISKLABEL 1868 || cmd == ODIOCWDINFO 1869 #endif 1870 ) 1871 error = writedisklabel(RAIDLABELDEV(dev), 1872 raidstrategy, rs->sc_dkdev.dk_label, 1873 rs->sc_dkdev.dk_cpulabel); 1874 } 1875 rs->sc_flags &= ~RAIDF_LABELLING; 1876 1877 raidunlock(rs); 1878 1879 if (error) 1880 return (error); 1881 break; 1882 } 1883 1884 case DIOCWLABEL: 1885 if (*(int *) data != 0) 1886 rs->sc_flags |= RAIDF_WLABEL; 1887 else 1888 rs->sc_flags &= ~RAIDF_WLABEL; 1889 break; 1890 1891 case DIOCGDEFLABEL: 1892 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1893 break; 1894 1895 #ifdef __HAVE_OLD_DISKLABEL 1896 case ODIOCGDEFLABEL: 1897 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1898 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1899 return ENOTTY; 1900 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1901 break; 1902 #endif 1903 1904 case DIOCCACHESYNC: 1905 return rf_sync_component_caches(raidPtr); 1906 1907 case DIOCGSTRATEGY: 1908 { 1909 struct disk_strategy *dks = (void *)data; 1910 1911 s = splbio(); 1912 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue), 1913 sizeof(dks->dks_name)); 1914 splx(s); 1915 dks->dks_paramlen = 0; 1916 1917 return 0; 1918 } 1919 1920 case DIOCSSTRATEGY: 1921 { 1922 struct disk_strategy *dks = (void *)data; 1923 struct bufq_state *new; 1924 struct bufq_state *old; 1925 1926 if (dks->dks_param != NULL) { 1927 return EINVAL; 1928 } 1929 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */ 1930 error = bufq_alloc(&new, dks->dks_name, 1931 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK); 1932 if (error) { 1933 return error; 1934 } 1935 s = splbio(); 1936 old = rs->buf_queue; 1937 bufq_move(new, old); 1938 rs->buf_queue = new; 1939 splx(s); 1940 bufq_free(old); 1941 1942 return 0; 1943 } 1944 1945 default: 1946 retcode = ENOTTY; 1947 } 1948 return (retcode); 1949 1950 } 1951 1952 1953 /* raidinit -- complete the rest of the initialization for the 1954 RAIDframe device. */ 1955 1956 1957 static void 1958 raidinit(struct raid_softc *rs) 1959 { 1960 cfdata_t cf; 1961 int unit; 1962 RF_Raid_t *raidPtr = &rs->sc_r; 1963 1964 unit = raidPtr->raidid; 1965 1966 1967 /* XXX should check return code first... */ 1968 rs->sc_flags |= RAIDF_INITED; 1969 1970 /* XXX doesn't check bounds. */ 1971 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1972 1973 /* attach the pseudo device */ 1974 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1975 cf->cf_name = raid_cd.cd_name; 1976 cf->cf_atname = raid_cd.cd_name; 1977 cf->cf_unit = unit; 1978 cf->cf_fstate = FSTATE_STAR; 1979 1980 rs->sc_dev = config_attach_pseudo(cf); 1981 1982 if (rs->sc_dev == NULL) { 1983 printf("raid%d: config_attach_pseudo failed\n", 1984 raidPtr->raidid); 1985 rs->sc_flags &= ~RAIDF_INITED; 1986 free(cf, M_RAIDFRAME); 1987 return; 1988 } 1989 1990 /* disk_attach actually creates space for the CPU disklabel, among 1991 * other things, so it's critical to call this *BEFORE* we try putzing 1992 * with disklabels. */ 1993 1994 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver); 1995 disk_attach(&rs->sc_dkdev); 1996 1997 /* XXX There may be a weird interaction here between this, and 1998 * protectedSectors, as used in RAIDframe. */ 1999 2000 rs->sc_size = raidPtr->totalSectors; 2001 2002 rf_set_geometry(rs, raidPtr); 2003 2004 dkwedge_discover(&rs->sc_dkdev); 2005 2006 } 2007 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 2008 /* wake up the daemon & tell it to get us a spare table 2009 * XXX 2010 * the entries in the queues should be tagged with the raidPtr 2011 * so that in the extremely rare case that two recons happen at once, 2012 * we know for which device were requesting a spare table 2013 * XXX 2014 * 2015 * XXX This code is not currently used. GO 2016 */ 2017 int 2018 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 2019 { 2020 int retcode; 2021 2022 rf_lock_mutex2(rf_sparet_wait_mutex); 2023 req->next = rf_sparet_wait_queue; 2024 rf_sparet_wait_queue = req; 2025 rf_broadcast_cond2(rf_sparet_wait_cv); 2026 2027 /* mpsleep unlocks the mutex */ 2028 while (!rf_sparet_resp_queue) { 2029 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex); 2030 } 2031 req = rf_sparet_resp_queue; 2032 rf_sparet_resp_queue = req->next; 2033 rf_unlock_mutex2(rf_sparet_wait_mutex); 2034 2035 retcode = req->fcol; 2036 RF_Free(req, sizeof(*req)); /* this is not the same req as we 2037 * alloc'd */ 2038 return (retcode); 2039 } 2040 #endif 2041 2042 /* a wrapper around rf_DoAccess that extracts appropriate info from the 2043 * bp & passes it down. 2044 * any calls originating in the kernel must use non-blocking I/O 2045 * do some extra sanity checking to return "appropriate" error values for 2046 * certain conditions (to make some standard utilities work) 2047 * 2048 * Formerly known as: rf_DoAccessKernel 2049 */ 2050 void 2051 raidstart(RF_Raid_t *raidPtr) 2052 { 2053 RF_SectorCount_t num_blocks, pb, sum; 2054 RF_RaidAddr_t raid_addr; 2055 struct partition *pp; 2056 daddr_t blocknum; 2057 struct raid_softc *rs; 2058 int do_async; 2059 struct buf *bp; 2060 int rc; 2061 2062 rs = raidPtr->softc; 2063 /* quick check to see if anything has died recently */ 2064 rf_lock_mutex2(raidPtr->mutex); 2065 if (raidPtr->numNewFailures > 0) { 2066 rf_unlock_mutex2(raidPtr->mutex); 2067 rf_update_component_labels(raidPtr, 2068 RF_NORMAL_COMPONENT_UPDATE); 2069 rf_lock_mutex2(raidPtr->mutex); 2070 raidPtr->numNewFailures--; 2071 } 2072 2073 /* Check to see if we're at the limit... */ 2074 while (raidPtr->openings > 0) { 2075 rf_unlock_mutex2(raidPtr->mutex); 2076 2077 /* get the next item, if any, from the queue */ 2078 if ((bp = bufq_get(rs->buf_queue)) == NULL) { 2079 /* nothing more to do */ 2080 return; 2081 } 2082 2083 /* Ok, for the bp we have here, bp->b_blkno is relative to the 2084 * partition.. Need to make it absolute to the underlying 2085 * device.. */ 2086 2087 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector; 2088 if (DISKPART(bp->b_dev) != RAW_PART) { 2089 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 2090 blocknum += pp->p_offset; 2091 } 2092 2093 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 2094 (int) blocknum)); 2095 2096 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 2097 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 2098 2099 /* *THIS* is where we adjust what block we're going to... 2100 * but DO NOT TOUCH bp->b_blkno!!! */ 2101 raid_addr = blocknum; 2102 2103 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 2104 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 2105 sum = raid_addr + num_blocks + pb; 2106 if (1 || rf_debugKernelAccess) { 2107 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 2108 (int) raid_addr, (int) sum, (int) num_blocks, 2109 (int) pb, (int) bp->b_resid)); 2110 } 2111 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 2112 || (sum < num_blocks) || (sum < pb)) { 2113 bp->b_error = ENOSPC; 2114 bp->b_resid = bp->b_bcount; 2115 biodone(bp); 2116 rf_lock_mutex2(raidPtr->mutex); 2117 continue; 2118 } 2119 /* 2120 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 2121 */ 2122 2123 if (bp->b_bcount & raidPtr->sectorMask) { 2124 bp->b_error = EINVAL; 2125 bp->b_resid = bp->b_bcount; 2126 biodone(bp); 2127 rf_lock_mutex2(raidPtr->mutex); 2128 continue; 2129 2130 } 2131 db1_printf(("Calling DoAccess..\n")); 2132 2133 2134 rf_lock_mutex2(raidPtr->mutex); 2135 raidPtr->openings--; 2136 rf_unlock_mutex2(raidPtr->mutex); 2137 2138 /* 2139 * Everything is async. 2140 */ 2141 do_async = 1; 2142 2143 disk_busy(&rs->sc_dkdev); 2144 2145 /* XXX we're still at splbio() here... do we *really* 2146 need to be? */ 2147 2148 /* don't ever condition on bp->b_flags & B_WRITE. 2149 * always condition on B_READ instead */ 2150 2151 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 2152 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 2153 do_async, raid_addr, num_blocks, 2154 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 2155 2156 if (rc) { 2157 bp->b_error = rc; 2158 bp->b_resid = bp->b_bcount; 2159 biodone(bp); 2160 /* continue loop */ 2161 } 2162 2163 rf_lock_mutex2(raidPtr->mutex); 2164 } 2165 rf_unlock_mutex2(raidPtr->mutex); 2166 } 2167 2168 2169 2170 2171 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 2172 2173 int 2174 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 2175 { 2176 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2177 struct buf *bp; 2178 2179 req->queue = queue; 2180 bp = req->bp; 2181 2182 switch (req->type) { 2183 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2184 /* XXX need to do something extra here.. */ 2185 /* I'm leaving this in, as I've never actually seen it used, 2186 * and I'd like folks to report it... GO */ 2187 printf(("WAKEUP CALLED\n")); 2188 queue->numOutstanding++; 2189 2190 bp->b_flags = 0; 2191 bp->b_private = req; 2192 2193 KernelWakeupFunc(bp); 2194 break; 2195 2196 case RF_IO_TYPE_READ: 2197 case RF_IO_TYPE_WRITE: 2198 #if RF_ACC_TRACE > 0 2199 if (req->tracerec) { 2200 RF_ETIMER_START(req->tracerec->timer); 2201 } 2202 #endif 2203 InitBP(bp, queue->rf_cinfo->ci_vp, 2204 op, queue->rf_cinfo->ci_dev, 2205 req->sectorOffset, req->numSector, 2206 req->buf, KernelWakeupFunc, (void *) req, 2207 queue->raidPtr->logBytesPerSector, req->b_proc); 2208 2209 if (rf_debugKernelAccess) { 2210 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2211 (long) bp->b_blkno)); 2212 } 2213 queue->numOutstanding++; 2214 queue->last_deq_sector = req->sectorOffset; 2215 /* acc wouldn't have been let in if there were any pending 2216 * reqs at any other priority */ 2217 queue->curPriority = req->priority; 2218 2219 db1_printf(("Going for %c to unit %d col %d\n", 2220 req->type, queue->raidPtr->raidid, 2221 queue->col)); 2222 db1_printf(("sector %d count %d (%d bytes) %d\n", 2223 (int) req->sectorOffset, (int) req->numSector, 2224 (int) (req->numSector << 2225 queue->raidPtr->logBytesPerSector), 2226 (int) queue->raidPtr->logBytesPerSector)); 2227 2228 /* 2229 * XXX: drop lock here since this can block at 2230 * least with backing SCSI devices. Retake it 2231 * to minimize fuss with calling interfaces. 2232 */ 2233 2234 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam"); 2235 bdev_strategy(bp); 2236 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam"); 2237 break; 2238 2239 default: 2240 panic("bad req->type in rf_DispatchKernelIO"); 2241 } 2242 db1_printf(("Exiting from DispatchKernelIO\n")); 2243 2244 return (0); 2245 } 2246 /* this is the callback function associated with a I/O invoked from 2247 kernel code. 2248 */ 2249 static void 2250 KernelWakeupFunc(struct buf *bp) 2251 { 2252 RF_DiskQueueData_t *req = NULL; 2253 RF_DiskQueue_t *queue; 2254 2255 db1_printf(("recovering the request queue:\n")); 2256 2257 req = bp->b_private; 2258 2259 queue = (RF_DiskQueue_t *) req->queue; 2260 2261 rf_lock_mutex2(queue->raidPtr->iodone_lock); 2262 2263 #if RF_ACC_TRACE > 0 2264 if (req->tracerec) { 2265 RF_ETIMER_STOP(req->tracerec->timer); 2266 RF_ETIMER_EVAL(req->tracerec->timer); 2267 rf_lock_mutex2(rf_tracing_mutex); 2268 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2269 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2270 req->tracerec->num_phys_ios++; 2271 rf_unlock_mutex2(rf_tracing_mutex); 2272 } 2273 #endif 2274 2275 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2276 * ballistic, and mark the component as hosed... */ 2277 2278 if (bp->b_error != 0) { 2279 /* Mark the disk as dead */ 2280 /* but only mark it once... */ 2281 /* and only if it wouldn't leave this RAID set 2282 completely broken */ 2283 if (((queue->raidPtr->Disks[queue->col].status == 2284 rf_ds_optimal) || 2285 (queue->raidPtr->Disks[queue->col].status == 2286 rf_ds_used_spare)) && 2287 (queue->raidPtr->numFailures < 2288 queue->raidPtr->Layout.map->faultsTolerated)) { 2289 printf("raid%d: IO Error (%d). Marking %s as failed.\n", 2290 queue->raidPtr->raidid, 2291 bp->b_error, 2292 queue->raidPtr->Disks[queue->col].devname); 2293 queue->raidPtr->Disks[queue->col].status = 2294 rf_ds_failed; 2295 queue->raidPtr->status = rf_rs_degraded; 2296 queue->raidPtr->numFailures++; 2297 queue->raidPtr->numNewFailures++; 2298 } else { /* Disk is already dead... */ 2299 /* printf("Disk already marked as dead!\n"); */ 2300 } 2301 2302 } 2303 2304 /* Fill in the error value */ 2305 req->error = bp->b_error; 2306 2307 /* Drop this one on the "finished" queue... */ 2308 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2309 2310 /* Let the raidio thread know there is work to be done. */ 2311 rf_signal_cond2(queue->raidPtr->iodone_cv); 2312 2313 rf_unlock_mutex2(queue->raidPtr->iodone_lock); 2314 } 2315 2316 2317 /* 2318 * initialize a buf structure for doing an I/O in the kernel. 2319 */ 2320 static void 2321 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2322 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2323 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 2324 struct proc *b_proc) 2325 { 2326 /* bp->b_flags = B_PHYS | rw_flag; */ 2327 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */ 2328 bp->b_oflags = 0; 2329 bp->b_cflags = 0; 2330 bp->b_bcount = numSect << logBytesPerSector; 2331 bp->b_bufsize = bp->b_bcount; 2332 bp->b_error = 0; 2333 bp->b_dev = dev; 2334 bp->b_data = bf; 2335 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT; 2336 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2337 if (bp->b_bcount == 0) { 2338 panic("bp->b_bcount is zero in InitBP!!"); 2339 } 2340 bp->b_proc = b_proc; 2341 bp->b_iodone = cbFunc; 2342 bp->b_private = cbArg; 2343 } 2344 2345 static void 2346 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 2347 struct disklabel *lp) 2348 { 2349 memset(lp, 0, sizeof(*lp)); 2350 2351 /* fabricate a label... */ 2352 if (raidPtr->totalSectors > UINT32_MAX) 2353 lp->d_secperunit = UINT32_MAX; 2354 else 2355 lp->d_secperunit = raidPtr->totalSectors; 2356 lp->d_secsize = raidPtr->bytesPerSector; 2357 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2358 lp->d_ntracks = 4 * raidPtr->numCol; 2359 lp->d_ncylinders = raidPtr->totalSectors / 2360 (lp->d_nsectors * lp->d_ntracks); 2361 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2362 2363 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2364 lp->d_type = DKTYPE_RAID; 2365 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2366 lp->d_rpm = 3600; 2367 lp->d_interleave = 1; 2368 lp->d_flags = 0; 2369 2370 lp->d_partitions[RAW_PART].p_offset = 0; 2371 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit; 2372 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2373 lp->d_npartitions = RAW_PART + 1; 2374 2375 lp->d_magic = DISKMAGIC; 2376 lp->d_magic2 = DISKMAGIC; 2377 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2378 2379 } 2380 /* 2381 * Read the disklabel from the raid device. If one is not present, fake one 2382 * up. 2383 */ 2384 static void 2385 raidgetdisklabel(dev_t dev) 2386 { 2387 int unit = raidunit(dev); 2388 struct raid_softc *rs; 2389 const char *errstring; 2390 struct disklabel *lp; 2391 struct cpu_disklabel *clp; 2392 RF_Raid_t *raidPtr; 2393 2394 if ((rs = raidget(unit)) == NULL) 2395 return; 2396 2397 lp = rs->sc_dkdev.dk_label; 2398 clp = rs->sc_dkdev.dk_cpulabel; 2399 2400 db1_printf(("Getting the disklabel...\n")); 2401 2402 memset(clp, 0, sizeof(*clp)); 2403 2404 raidPtr = &rs->sc_r; 2405 2406 raidgetdefaultlabel(raidPtr, rs, lp); 2407 2408 /* 2409 * Call the generic disklabel extraction routine. 2410 */ 2411 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2412 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2413 if (errstring) 2414 raidmakedisklabel(rs); 2415 else { 2416 int i; 2417 struct partition *pp; 2418 2419 /* 2420 * Sanity check whether the found disklabel is valid. 2421 * 2422 * This is necessary since total size of the raid device 2423 * may vary when an interleave is changed even though exactly 2424 * same components are used, and old disklabel may used 2425 * if that is found. 2426 */ 2427 if (lp->d_secperunit < UINT32_MAX ? 2428 lp->d_secperunit != rs->sc_size : 2429 lp->d_secperunit > rs->sc_size) 2430 printf("raid%d: WARNING: %s: " 2431 "total sector size in disklabel (%ju) != " 2432 "the size of raid (%ju)\n", unit, rs->sc_xname, 2433 (uintmax_t)lp->d_secperunit, 2434 (uintmax_t)rs->sc_size); 2435 for (i = 0; i < lp->d_npartitions; i++) { 2436 pp = &lp->d_partitions[i]; 2437 if (pp->p_offset + pp->p_size > rs->sc_size) 2438 printf("raid%d: WARNING: %s: end of partition `%c' " 2439 "exceeds the size of raid (%ju)\n", 2440 unit, rs->sc_xname, 'a' + i, 2441 (uintmax_t)rs->sc_size); 2442 } 2443 } 2444 2445 } 2446 /* 2447 * Take care of things one might want to take care of in the event 2448 * that a disklabel isn't present. 2449 */ 2450 static void 2451 raidmakedisklabel(struct raid_softc *rs) 2452 { 2453 struct disklabel *lp = rs->sc_dkdev.dk_label; 2454 db1_printf(("Making a label..\n")); 2455 2456 /* 2457 * For historical reasons, if there's no disklabel present 2458 * the raw partition must be marked FS_BSDFFS. 2459 */ 2460 2461 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2462 2463 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2464 2465 lp->d_checksum = dkcksum(lp); 2466 } 2467 /* 2468 * Wait interruptibly for an exclusive lock. 2469 * 2470 * XXX 2471 * Several drivers do this; it should be abstracted and made MP-safe. 2472 * (Hmm... where have we seen this warning before :-> GO ) 2473 */ 2474 static int 2475 raidlock(struct raid_softc *rs) 2476 { 2477 int error; 2478 2479 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2480 rs->sc_flags |= RAIDF_WANTED; 2481 if ((error = 2482 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2483 return (error); 2484 } 2485 rs->sc_flags |= RAIDF_LOCKED; 2486 return (0); 2487 } 2488 /* 2489 * Unlock and wake up any waiters. 2490 */ 2491 static void 2492 raidunlock(struct raid_softc *rs) 2493 { 2494 2495 rs->sc_flags &= ~RAIDF_LOCKED; 2496 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2497 rs->sc_flags &= ~RAIDF_WANTED; 2498 wakeup(rs); 2499 } 2500 } 2501 2502 2503 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2504 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2505 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE 2506 2507 static daddr_t 2508 rf_component_info_offset(void) 2509 { 2510 2511 return RF_COMPONENT_INFO_OFFSET; 2512 } 2513 2514 static daddr_t 2515 rf_component_info_size(unsigned secsize) 2516 { 2517 daddr_t info_size; 2518 2519 KASSERT(secsize); 2520 if (secsize > RF_COMPONENT_INFO_SIZE) 2521 info_size = secsize; 2522 else 2523 info_size = RF_COMPONENT_INFO_SIZE; 2524 2525 return info_size; 2526 } 2527 2528 static daddr_t 2529 rf_parity_map_offset(RF_Raid_t *raidPtr) 2530 { 2531 daddr_t map_offset; 2532 2533 KASSERT(raidPtr->bytesPerSector); 2534 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE) 2535 map_offset = raidPtr->bytesPerSector; 2536 else 2537 map_offset = RF_COMPONENT_INFO_SIZE; 2538 map_offset += rf_component_info_offset(); 2539 2540 return map_offset; 2541 } 2542 2543 static daddr_t 2544 rf_parity_map_size(RF_Raid_t *raidPtr) 2545 { 2546 daddr_t map_size; 2547 2548 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE) 2549 map_size = raidPtr->bytesPerSector; 2550 else 2551 map_size = RF_PARITY_MAP_SIZE; 2552 2553 return map_size; 2554 } 2555 2556 int 2557 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col) 2558 { 2559 RF_ComponentLabel_t *clabel; 2560 2561 clabel = raidget_component_label(raidPtr, col); 2562 clabel->clean = RF_RAID_CLEAN; 2563 raidflush_component_label(raidPtr, col); 2564 return(0); 2565 } 2566 2567 2568 int 2569 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col) 2570 { 2571 RF_ComponentLabel_t *clabel; 2572 2573 clabel = raidget_component_label(raidPtr, col); 2574 clabel->clean = RF_RAID_DIRTY; 2575 raidflush_component_label(raidPtr, col); 2576 return(0); 2577 } 2578 2579 int 2580 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2581 { 2582 KASSERT(raidPtr->bytesPerSector); 2583 return raidread_component_label(raidPtr->bytesPerSector, 2584 raidPtr->Disks[col].dev, 2585 raidPtr->raid_cinfo[col].ci_vp, 2586 &raidPtr->raid_cinfo[col].ci_label); 2587 } 2588 2589 RF_ComponentLabel_t * 2590 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2591 { 2592 return &raidPtr->raid_cinfo[col].ci_label; 2593 } 2594 2595 int 2596 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2597 { 2598 RF_ComponentLabel_t *label; 2599 2600 label = &raidPtr->raid_cinfo[col].ci_label; 2601 label->mod_counter = raidPtr->mod_counter; 2602 #ifndef RF_NO_PARITY_MAP 2603 label->parity_map_modcount = label->mod_counter; 2604 #endif 2605 return raidwrite_component_label(raidPtr->bytesPerSector, 2606 raidPtr->Disks[col].dev, 2607 raidPtr->raid_cinfo[col].ci_vp, label); 2608 } 2609 2610 2611 static int 2612 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2613 RF_ComponentLabel_t *clabel) 2614 { 2615 return raidread_component_area(dev, b_vp, clabel, 2616 sizeof(RF_ComponentLabel_t), 2617 rf_component_info_offset(), 2618 rf_component_info_size(secsize)); 2619 } 2620 2621 /* ARGSUSED */ 2622 static int 2623 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data, 2624 size_t msize, daddr_t offset, daddr_t dsize) 2625 { 2626 struct buf *bp; 2627 const struct bdevsw *bdev; 2628 int error; 2629 2630 /* XXX should probably ensure that we don't try to do this if 2631 someone has changed rf_protected_sectors. */ 2632 2633 if (b_vp == NULL) { 2634 /* For whatever reason, this component is not valid. 2635 Don't try to read a component label from it. */ 2636 return(EINVAL); 2637 } 2638 2639 /* get a block of the appropriate size... */ 2640 bp = geteblk((int)dsize); 2641 bp->b_dev = dev; 2642 2643 /* get our ducks in a row for the read */ 2644 bp->b_blkno = offset / DEV_BSIZE; 2645 bp->b_bcount = dsize; 2646 bp->b_flags |= B_READ; 2647 bp->b_resid = dsize; 2648 2649 bdev = bdevsw_lookup(bp->b_dev); 2650 if (bdev == NULL) 2651 return (ENXIO); 2652 (*bdev->d_strategy)(bp); 2653 2654 error = biowait(bp); 2655 2656 if (!error) { 2657 memcpy(data, bp->b_data, msize); 2658 } 2659 2660 brelse(bp, 0); 2661 return(error); 2662 } 2663 2664 2665 static int 2666 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2667 RF_ComponentLabel_t *clabel) 2668 { 2669 return raidwrite_component_area(dev, b_vp, clabel, 2670 sizeof(RF_ComponentLabel_t), 2671 rf_component_info_offset(), 2672 rf_component_info_size(secsize), 0); 2673 } 2674 2675 /* ARGSUSED */ 2676 static int 2677 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data, 2678 size_t msize, daddr_t offset, daddr_t dsize, int asyncp) 2679 { 2680 struct buf *bp; 2681 const struct bdevsw *bdev; 2682 int error; 2683 2684 /* get a block of the appropriate size... */ 2685 bp = geteblk((int)dsize); 2686 bp->b_dev = dev; 2687 2688 /* get our ducks in a row for the write */ 2689 bp->b_blkno = offset / DEV_BSIZE; 2690 bp->b_bcount = dsize; 2691 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0); 2692 bp->b_resid = dsize; 2693 2694 memset(bp->b_data, 0, dsize); 2695 memcpy(bp->b_data, data, msize); 2696 2697 bdev = bdevsw_lookup(bp->b_dev); 2698 if (bdev == NULL) 2699 return (ENXIO); 2700 (*bdev->d_strategy)(bp); 2701 if (asyncp) 2702 return 0; 2703 error = biowait(bp); 2704 brelse(bp, 0); 2705 if (error) { 2706 #if 1 2707 printf("Failed to write RAID component info!\n"); 2708 #endif 2709 } 2710 2711 return(error); 2712 } 2713 2714 void 2715 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2716 { 2717 int c; 2718 2719 for (c = 0; c < raidPtr->numCol; c++) { 2720 /* Skip dead disks. */ 2721 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2722 continue; 2723 /* XXXjld: what if an error occurs here? */ 2724 raidwrite_component_area(raidPtr->Disks[c].dev, 2725 raidPtr->raid_cinfo[c].ci_vp, map, 2726 RF_PARITYMAP_NBYTE, 2727 rf_parity_map_offset(raidPtr), 2728 rf_parity_map_size(raidPtr), 0); 2729 } 2730 } 2731 2732 void 2733 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2734 { 2735 struct rf_paritymap_ondisk tmp; 2736 int c,first; 2737 2738 first=1; 2739 for (c = 0; c < raidPtr->numCol; c++) { 2740 /* Skip dead disks. */ 2741 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2742 continue; 2743 raidread_component_area(raidPtr->Disks[c].dev, 2744 raidPtr->raid_cinfo[c].ci_vp, &tmp, 2745 RF_PARITYMAP_NBYTE, 2746 rf_parity_map_offset(raidPtr), 2747 rf_parity_map_size(raidPtr)); 2748 if (first) { 2749 memcpy(map, &tmp, sizeof(*map)); 2750 first = 0; 2751 } else { 2752 rf_paritymap_merge(map, &tmp); 2753 } 2754 } 2755 } 2756 2757 void 2758 rf_markalldirty(RF_Raid_t *raidPtr) 2759 { 2760 RF_ComponentLabel_t *clabel; 2761 int sparecol; 2762 int c; 2763 int j; 2764 int scol = -1; 2765 2766 raidPtr->mod_counter++; 2767 for (c = 0; c < raidPtr->numCol; c++) { 2768 /* we don't want to touch (at all) a disk that has 2769 failed */ 2770 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2771 clabel = raidget_component_label(raidPtr, c); 2772 if (clabel->status == rf_ds_spared) { 2773 /* XXX do something special... 2774 but whatever you do, don't 2775 try to access it!! */ 2776 } else { 2777 raidmarkdirty(raidPtr, c); 2778 } 2779 } 2780 } 2781 2782 for( c = 0; c < raidPtr->numSpare ; c++) { 2783 sparecol = raidPtr->numCol + c; 2784 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2785 /* 2786 2787 we claim this disk is "optimal" if it's 2788 rf_ds_used_spare, as that means it should be 2789 directly substitutable for the disk it replaced. 2790 We note that too... 2791 2792 */ 2793 2794 for(j=0;j<raidPtr->numCol;j++) { 2795 if (raidPtr->Disks[j].spareCol == sparecol) { 2796 scol = j; 2797 break; 2798 } 2799 } 2800 2801 clabel = raidget_component_label(raidPtr, sparecol); 2802 /* make sure status is noted */ 2803 2804 raid_init_component_label(raidPtr, clabel); 2805 2806 clabel->row = 0; 2807 clabel->column = scol; 2808 /* Note: we *don't* change status from rf_ds_used_spare 2809 to rf_ds_optimal */ 2810 /* clabel.status = rf_ds_optimal; */ 2811 2812 raidmarkdirty(raidPtr, sparecol); 2813 } 2814 } 2815 } 2816 2817 2818 void 2819 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2820 { 2821 RF_ComponentLabel_t *clabel; 2822 int sparecol; 2823 int c; 2824 int j; 2825 int scol; 2826 2827 scol = -1; 2828 2829 /* XXX should do extra checks to make sure things really are clean, 2830 rather than blindly setting the clean bit... */ 2831 2832 raidPtr->mod_counter++; 2833 2834 for (c = 0; c < raidPtr->numCol; c++) { 2835 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2836 clabel = raidget_component_label(raidPtr, c); 2837 /* make sure status is noted */ 2838 clabel->status = rf_ds_optimal; 2839 2840 /* note what unit we are configured as */ 2841 clabel->last_unit = raidPtr->raidid; 2842 2843 raidflush_component_label(raidPtr, c); 2844 if (final == RF_FINAL_COMPONENT_UPDATE) { 2845 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2846 raidmarkclean(raidPtr, c); 2847 } 2848 } 2849 } 2850 /* else we don't touch it.. */ 2851 } 2852 2853 for( c = 0; c < raidPtr->numSpare ; c++) { 2854 sparecol = raidPtr->numCol + c; 2855 /* Need to ensure that the reconstruct actually completed! */ 2856 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2857 /* 2858 2859 we claim this disk is "optimal" if it's 2860 rf_ds_used_spare, as that means it should be 2861 directly substitutable for the disk it replaced. 2862 We note that too... 2863 2864 */ 2865 2866 for(j=0;j<raidPtr->numCol;j++) { 2867 if (raidPtr->Disks[j].spareCol == sparecol) { 2868 scol = j; 2869 break; 2870 } 2871 } 2872 2873 /* XXX shouldn't *really* need this... */ 2874 clabel = raidget_component_label(raidPtr, sparecol); 2875 /* make sure status is noted */ 2876 2877 raid_init_component_label(raidPtr, clabel); 2878 2879 clabel->column = scol; 2880 clabel->status = rf_ds_optimal; 2881 clabel->last_unit = raidPtr->raidid; 2882 2883 raidflush_component_label(raidPtr, sparecol); 2884 if (final == RF_FINAL_COMPONENT_UPDATE) { 2885 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2886 raidmarkclean(raidPtr, sparecol); 2887 } 2888 } 2889 } 2890 } 2891 } 2892 2893 void 2894 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2895 { 2896 2897 if (vp != NULL) { 2898 if (auto_configured == 1) { 2899 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2900 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2901 vput(vp); 2902 2903 } else { 2904 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2905 } 2906 } 2907 } 2908 2909 2910 void 2911 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2912 { 2913 int r,c; 2914 struct vnode *vp; 2915 int acd; 2916 2917 2918 /* We take this opportunity to close the vnodes like we should.. */ 2919 2920 for (c = 0; c < raidPtr->numCol; c++) { 2921 vp = raidPtr->raid_cinfo[c].ci_vp; 2922 acd = raidPtr->Disks[c].auto_configured; 2923 rf_close_component(raidPtr, vp, acd); 2924 raidPtr->raid_cinfo[c].ci_vp = NULL; 2925 raidPtr->Disks[c].auto_configured = 0; 2926 } 2927 2928 for (r = 0; r < raidPtr->numSpare; r++) { 2929 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2930 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2931 rf_close_component(raidPtr, vp, acd); 2932 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2933 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2934 } 2935 } 2936 2937 2938 void 2939 rf_ReconThread(struct rf_recon_req *req) 2940 { 2941 int s; 2942 RF_Raid_t *raidPtr; 2943 2944 s = splbio(); 2945 raidPtr = (RF_Raid_t *) req->raidPtr; 2946 raidPtr->recon_in_progress = 1; 2947 2948 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2949 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2950 2951 RF_Free(req, sizeof(*req)); 2952 2953 raidPtr->recon_in_progress = 0; 2954 splx(s); 2955 2956 /* That's all... */ 2957 kthread_exit(0); /* does not return */ 2958 } 2959 2960 void 2961 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2962 { 2963 int retcode; 2964 int s; 2965 2966 raidPtr->parity_rewrite_stripes_done = 0; 2967 raidPtr->parity_rewrite_in_progress = 1; 2968 s = splbio(); 2969 retcode = rf_RewriteParity(raidPtr); 2970 splx(s); 2971 if (retcode) { 2972 printf("raid%d: Error re-writing parity (%d)!\n", 2973 raidPtr->raidid, retcode); 2974 } else { 2975 /* set the clean bit! If we shutdown correctly, 2976 the clean bit on each component label will get 2977 set */ 2978 raidPtr->parity_good = RF_RAID_CLEAN; 2979 } 2980 raidPtr->parity_rewrite_in_progress = 0; 2981 2982 /* Anyone waiting for us to stop? If so, inform them... */ 2983 if (raidPtr->waitShutdown) { 2984 wakeup(&raidPtr->parity_rewrite_in_progress); 2985 } 2986 2987 /* That's all... */ 2988 kthread_exit(0); /* does not return */ 2989 } 2990 2991 2992 void 2993 rf_CopybackThread(RF_Raid_t *raidPtr) 2994 { 2995 int s; 2996 2997 raidPtr->copyback_in_progress = 1; 2998 s = splbio(); 2999 rf_CopybackReconstructedData(raidPtr); 3000 splx(s); 3001 raidPtr->copyback_in_progress = 0; 3002 3003 /* That's all... */ 3004 kthread_exit(0); /* does not return */ 3005 } 3006 3007 3008 void 3009 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 3010 { 3011 int s; 3012 RF_Raid_t *raidPtr; 3013 3014 s = splbio(); 3015 raidPtr = req->raidPtr; 3016 raidPtr->recon_in_progress = 1; 3017 rf_ReconstructInPlace(raidPtr, req->col); 3018 RF_Free(req, sizeof(*req)); 3019 raidPtr->recon_in_progress = 0; 3020 splx(s); 3021 3022 /* That's all... */ 3023 kthread_exit(0); /* does not return */ 3024 } 3025 3026 static RF_AutoConfig_t * 3027 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 3028 const char *cname, RF_SectorCount_t size, uint64_t numsecs, 3029 unsigned secsize) 3030 { 3031 int good_one = 0; 3032 RF_ComponentLabel_t *clabel; 3033 RF_AutoConfig_t *ac; 3034 3035 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT); 3036 if (clabel == NULL) { 3037 oomem: 3038 while(ac_list) { 3039 ac = ac_list; 3040 if (ac->clabel) 3041 free(ac->clabel, M_RAIDFRAME); 3042 ac_list = ac_list->next; 3043 free(ac, M_RAIDFRAME); 3044 } 3045 printf("RAID auto config: out of memory!\n"); 3046 return NULL; /* XXX probably should panic? */ 3047 } 3048 3049 if (!raidread_component_label(secsize, dev, vp, clabel)) { 3050 /* Got the label. Does it look reasonable? */ 3051 if (rf_reasonable_label(clabel, numsecs) && 3052 (rf_component_label_partitionsize(clabel) <= size)) { 3053 #ifdef DEBUG 3054 printf("Component on: %s: %llu\n", 3055 cname, (unsigned long long)size); 3056 rf_print_component_label(clabel); 3057 #endif 3058 /* if it's reasonable, add it, else ignore it. */ 3059 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 3060 M_NOWAIT); 3061 if (ac == NULL) { 3062 free(clabel, M_RAIDFRAME); 3063 goto oomem; 3064 } 3065 strlcpy(ac->devname, cname, sizeof(ac->devname)); 3066 ac->dev = dev; 3067 ac->vp = vp; 3068 ac->clabel = clabel; 3069 ac->next = ac_list; 3070 ac_list = ac; 3071 good_one = 1; 3072 } 3073 } 3074 if (!good_one) { 3075 /* cleanup */ 3076 free(clabel, M_RAIDFRAME); 3077 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3078 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3079 vput(vp); 3080 } 3081 return ac_list; 3082 } 3083 3084 RF_AutoConfig_t * 3085 rf_find_raid_components(void) 3086 { 3087 struct vnode *vp; 3088 struct disklabel label; 3089 device_t dv; 3090 deviter_t di; 3091 dev_t dev; 3092 int bmajor, bminor, wedge, rf_part_found; 3093 int error; 3094 int i; 3095 RF_AutoConfig_t *ac_list; 3096 uint64_t numsecs; 3097 unsigned secsize; 3098 3099 /* initialize the AutoConfig list */ 3100 ac_list = NULL; 3101 3102 /* we begin by trolling through *all* the devices on the system */ 3103 3104 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; 3105 dv = deviter_next(&di)) { 3106 3107 /* we are only interested in disks... */ 3108 if (device_class(dv) != DV_DISK) 3109 continue; 3110 3111 /* we don't care about floppies... */ 3112 if (device_is_a(dv, "fd")) { 3113 continue; 3114 } 3115 3116 /* we don't care about CD's... */ 3117 if (device_is_a(dv, "cd")) { 3118 continue; 3119 } 3120 3121 /* we don't care about md's... */ 3122 if (device_is_a(dv, "md")) { 3123 continue; 3124 } 3125 3126 /* hdfd is the Atari/Hades floppy driver */ 3127 if (device_is_a(dv, "hdfd")) { 3128 continue; 3129 } 3130 3131 /* fdisa is the Atari/Milan floppy driver */ 3132 if (device_is_a(dv, "fdisa")) { 3133 continue; 3134 } 3135 3136 /* need to find the device_name_to_block_device_major stuff */ 3137 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 3138 3139 rf_part_found = 0; /*No raid partition as yet*/ 3140 3141 /* get a vnode for the raw partition of this disk */ 3142 3143 wedge = device_is_a(dv, "dk"); 3144 bminor = minor(device_unit(dv)); 3145 dev = wedge ? makedev(bmajor, bminor) : 3146 MAKEDISKDEV(bmajor, bminor, RAW_PART); 3147 if (bdevvp(dev, &vp)) 3148 panic("RAID can't alloc vnode"); 3149 3150 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED); 3151 3152 if (error) { 3153 /* "Who cares." Continue looking 3154 for something that exists*/ 3155 vput(vp); 3156 continue; 3157 } 3158 3159 error = getdisksize(vp, &numsecs, &secsize); 3160 if (error) { 3161 vput(vp); 3162 continue; 3163 } 3164 if (wedge) { 3165 struct dkwedge_info dkw; 3166 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 3167 NOCRED); 3168 if (error) { 3169 printf("RAIDframe: can't get wedge info for " 3170 "dev %s (%d)\n", device_xname(dv), error); 3171 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3172 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3173 vput(vp); 3174 continue; 3175 } 3176 3177 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 3178 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3179 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3180 vput(vp); 3181 continue; 3182 } 3183 3184 ac_list = rf_get_component(ac_list, dev, vp, 3185 device_xname(dv), dkw.dkw_size, numsecs, secsize); 3186 rf_part_found = 1; /*There is a raid component on this disk*/ 3187 continue; 3188 } 3189 3190 /* Ok, the disk exists. Go get the disklabel. */ 3191 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 3192 if (error) { 3193 /* 3194 * XXX can't happen - open() would 3195 * have errored out (or faked up one) 3196 */ 3197 if (error != ENOTTY) 3198 printf("RAIDframe: can't get label for dev " 3199 "%s (%d)\n", device_xname(dv), error); 3200 } 3201 3202 /* don't need this any more. We'll allocate it again 3203 a little later if we really do... */ 3204 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3205 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3206 vput(vp); 3207 3208 if (error) 3209 continue; 3210 3211 rf_part_found = 0; /*No raid partitions yet*/ 3212 for (i = 0; i < label.d_npartitions; i++) { 3213 char cname[sizeof(ac_list->devname)]; 3214 3215 /* We only support partitions marked as RAID */ 3216 if (label.d_partitions[i].p_fstype != FS_RAID) 3217 continue; 3218 3219 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 3220 if (bdevvp(dev, &vp)) 3221 panic("RAID can't alloc vnode"); 3222 3223 error = VOP_OPEN(vp, FREAD, NOCRED); 3224 if (error) { 3225 /* Whatever... */ 3226 vput(vp); 3227 continue; 3228 } 3229 snprintf(cname, sizeof(cname), "%s%c", 3230 device_xname(dv), 'a' + i); 3231 ac_list = rf_get_component(ac_list, dev, vp, cname, 3232 label.d_partitions[i].p_size, numsecs, secsize); 3233 rf_part_found = 1; /*There is at least one raid partition on this disk*/ 3234 } 3235 3236 /* 3237 *If there is no raid component on this disk, either in a 3238 *disklabel or inside a wedge, check the raw partition as well, 3239 *as it is possible to configure raid components on raw disk 3240 *devices. 3241 */ 3242 3243 if (!rf_part_found) { 3244 char cname[sizeof(ac_list->devname)]; 3245 3246 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART); 3247 if (bdevvp(dev, &vp)) 3248 panic("RAID can't alloc vnode"); 3249 3250 error = VOP_OPEN(vp, FREAD, NOCRED); 3251 if (error) { 3252 /* Whatever... */ 3253 vput(vp); 3254 continue; 3255 } 3256 snprintf(cname, sizeof(cname), "%s%c", 3257 device_xname(dv), 'a' + RAW_PART); 3258 ac_list = rf_get_component(ac_list, dev, vp, cname, 3259 label.d_partitions[RAW_PART].p_size, numsecs, secsize); 3260 } 3261 } 3262 deviter_release(&di); 3263 return ac_list; 3264 } 3265 3266 3267 int 3268 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3269 { 3270 3271 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 3272 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 3273 ((clabel->clean == RF_RAID_CLEAN) || 3274 (clabel->clean == RF_RAID_DIRTY)) && 3275 clabel->row >=0 && 3276 clabel->column >= 0 && 3277 clabel->num_rows > 0 && 3278 clabel->num_columns > 0 && 3279 clabel->row < clabel->num_rows && 3280 clabel->column < clabel->num_columns && 3281 clabel->blockSize > 0 && 3282 /* 3283 * numBlocksHi may contain garbage, but it is ok since 3284 * the type is unsigned. If it is really garbage, 3285 * rf_fix_old_label_size() will fix it. 3286 */ 3287 rf_component_label_numblocks(clabel) > 0) { 3288 /* 3289 * label looks reasonable enough... 3290 * let's make sure it has no old garbage. 3291 */ 3292 if (numsecs) 3293 rf_fix_old_label_size(clabel, numsecs); 3294 return(1); 3295 } 3296 return(0); 3297 } 3298 3299 3300 /* 3301 * For reasons yet unknown, some old component labels have garbage in 3302 * the newer numBlocksHi region, and this causes lossage. Since those 3303 * disks will also have numsecs set to less than 32 bits of sectors, 3304 * we can determine when this corruption has occurred, and fix it. 3305 * 3306 * The exact same problem, with the same unknown reason, happens to 3307 * the partitionSizeHi member as well. 3308 */ 3309 static void 3310 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3311 { 3312 3313 if (numsecs < ((uint64_t)1 << 32)) { 3314 if (clabel->numBlocksHi) { 3315 printf("WARNING: total sectors < 32 bits, yet " 3316 "numBlocksHi set\n" 3317 "WARNING: resetting numBlocksHi to zero.\n"); 3318 clabel->numBlocksHi = 0; 3319 } 3320 3321 if (clabel->partitionSizeHi) { 3322 printf("WARNING: total sectors < 32 bits, yet " 3323 "partitionSizeHi set\n" 3324 "WARNING: resetting partitionSizeHi to zero.\n"); 3325 clabel->partitionSizeHi = 0; 3326 } 3327 } 3328 } 3329 3330 3331 #ifdef DEBUG 3332 void 3333 rf_print_component_label(RF_ComponentLabel_t *clabel) 3334 { 3335 uint64_t numBlocks; 3336 static const char *rp[] = { 3337 "No", "Force", "Soft", "*invalid*" 3338 }; 3339 3340 3341 numBlocks = rf_component_label_numblocks(clabel); 3342 3343 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3344 clabel->row, clabel->column, 3345 clabel->num_rows, clabel->num_columns); 3346 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3347 clabel->version, clabel->serial_number, 3348 clabel->mod_counter); 3349 printf(" Clean: %s Status: %d\n", 3350 clabel->clean ? "Yes" : "No", clabel->status); 3351 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3352 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3353 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n", 3354 (char) clabel->parityConfig, clabel->blockSize, numBlocks); 3355 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No"); 3356 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]); 3357 printf(" Last configured as: raid%d\n", clabel->last_unit); 3358 #if 0 3359 printf(" Config order: %d\n", clabel->config_order); 3360 #endif 3361 3362 } 3363 #endif 3364 3365 RF_ConfigSet_t * 3366 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3367 { 3368 RF_AutoConfig_t *ac; 3369 RF_ConfigSet_t *config_sets; 3370 RF_ConfigSet_t *cset; 3371 RF_AutoConfig_t *ac_next; 3372 3373 3374 config_sets = NULL; 3375 3376 /* Go through the AutoConfig list, and figure out which components 3377 belong to what sets. */ 3378 ac = ac_list; 3379 while(ac!=NULL) { 3380 /* we're going to putz with ac->next, so save it here 3381 for use at the end of the loop */ 3382 ac_next = ac->next; 3383 3384 if (config_sets == NULL) { 3385 /* will need at least this one... */ 3386 config_sets = (RF_ConfigSet_t *) 3387 malloc(sizeof(RF_ConfigSet_t), 3388 M_RAIDFRAME, M_NOWAIT); 3389 if (config_sets == NULL) { 3390 panic("rf_create_auto_sets: No memory!"); 3391 } 3392 /* this one is easy :) */ 3393 config_sets->ac = ac; 3394 config_sets->next = NULL; 3395 config_sets->rootable = 0; 3396 ac->next = NULL; 3397 } else { 3398 /* which set does this component fit into? */ 3399 cset = config_sets; 3400 while(cset!=NULL) { 3401 if (rf_does_it_fit(cset, ac)) { 3402 /* looks like it matches... */ 3403 ac->next = cset->ac; 3404 cset->ac = ac; 3405 break; 3406 } 3407 cset = cset->next; 3408 } 3409 if (cset==NULL) { 3410 /* didn't find a match above... new set..*/ 3411 cset = (RF_ConfigSet_t *) 3412 malloc(sizeof(RF_ConfigSet_t), 3413 M_RAIDFRAME, M_NOWAIT); 3414 if (cset == NULL) { 3415 panic("rf_create_auto_sets: No memory!"); 3416 } 3417 cset->ac = ac; 3418 ac->next = NULL; 3419 cset->next = config_sets; 3420 cset->rootable = 0; 3421 config_sets = cset; 3422 } 3423 } 3424 ac = ac_next; 3425 } 3426 3427 3428 return(config_sets); 3429 } 3430 3431 static int 3432 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3433 { 3434 RF_ComponentLabel_t *clabel1, *clabel2; 3435 3436 /* If this one matches the *first* one in the set, that's good 3437 enough, since the other members of the set would have been 3438 through here too... */ 3439 /* note that we are not checking partitionSize here.. 3440 3441 Note that we are also not checking the mod_counters here. 3442 If everything else matches except the mod_counter, that's 3443 good enough for this test. We will deal with the mod_counters 3444 a little later in the autoconfiguration process. 3445 3446 (clabel1->mod_counter == clabel2->mod_counter) && 3447 3448 The reason we don't check for this is that failed disks 3449 will have lower modification counts. If those disks are 3450 not added to the set they used to belong to, then they will 3451 form their own set, which may result in 2 different sets, 3452 for example, competing to be configured at raid0, and 3453 perhaps competing to be the root filesystem set. If the 3454 wrong ones get configured, or both attempt to become /, 3455 weird behaviour and or serious lossage will occur. Thus we 3456 need to bring them into the fold here, and kick them out at 3457 a later point. 3458 3459 */ 3460 3461 clabel1 = cset->ac->clabel; 3462 clabel2 = ac->clabel; 3463 if ((clabel1->version == clabel2->version) && 3464 (clabel1->serial_number == clabel2->serial_number) && 3465 (clabel1->num_rows == clabel2->num_rows) && 3466 (clabel1->num_columns == clabel2->num_columns) && 3467 (clabel1->sectPerSU == clabel2->sectPerSU) && 3468 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3469 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3470 (clabel1->parityConfig == clabel2->parityConfig) && 3471 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3472 (clabel1->blockSize == clabel2->blockSize) && 3473 rf_component_label_numblocks(clabel1) == 3474 rf_component_label_numblocks(clabel2) && 3475 (clabel1->autoconfigure == clabel2->autoconfigure) && 3476 (clabel1->root_partition == clabel2->root_partition) && 3477 (clabel1->last_unit == clabel2->last_unit) && 3478 (clabel1->config_order == clabel2->config_order)) { 3479 /* if it get's here, it almost *has* to be a match */ 3480 } else { 3481 /* it's not consistent with somebody in the set.. 3482 punt */ 3483 return(0); 3484 } 3485 /* all was fine.. it must fit... */ 3486 return(1); 3487 } 3488 3489 int 3490 rf_have_enough_components(RF_ConfigSet_t *cset) 3491 { 3492 RF_AutoConfig_t *ac; 3493 RF_AutoConfig_t *auto_config; 3494 RF_ComponentLabel_t *clabel; 3495 int c; 3496 int num_cols; 3497 int num_missing; 3498 int mod_counter; 3499 int mod_counter_found; 3500 int even_pair_failed; 3501 char parity_type; 3502 3503 3504 /* check to see that we have enough 'live' components 3505 of this set. If so, we can configure it if necessary */ 3506 3507 num_cols = cset->ac->clabel->num_columns; 3508 parity_type = cset->ac->clabel->parityConfig; 3509 3510 /* XXX Check for duplicate components!?!?!? */ 3511 3512 /* Determine what the mod_counter is supposed to be for this set. */ 3513 3514 mod_counter_found = 0; 3515 mod_counter = 0; 3516 ac = cset->ac; 3517 while(ac!=NULL) { 3518 if (mod_counter_found==0) { 3519 mod_counter = ac->clabel->mod_counter; 3520 mod_counter_found = 1; 3521 } else { 3522 if (ac->clabel->mod_counter > mod_counter) { 3523 mod_counter = ac->clabel->mod_counter; 3524 } 3525 } 3526 ac = ac->next; 3527 } 3528 3529 num_missing = 0; 3530 auto_config = cset->ac; 3531 3532 even_pair_failed = 0; 3533 for(c=0; c<num_cols; c++) { 3534 ac = auto_config; 3535 while(ac!=NULL) { 3536 if ((ac->clabel->column == c) && 3537 (ac->clabel->mod_counter == mod_counter)) { 3538 /* it's this one... */ 3539 #ifdef DEBUG 3540 printf("Found: %s at %d\n", 3541 ac->devname,c); 3542 #endif 3543 break; 3544 } 3545 ac=ac->next; 3546 } 3547 if (ac==NULL) { 3548 /* Didn't find one here! */ 3549 /* special case for RAID 1, especially 3550 where there are more than 2 3551 components (where RAIDframe treats 3552 things a little differently :( ) */ 3553 if (parity_type == '1') { 3554 if (c%2 == 0) { /* even component */ 3555 even_pair_failed = 1; 3556 } else { /* odd component. If 3557 we're failed, and 3558 so is the even 3559 component, it's 3560 "Good Night, Charlie" */ 3561 if (even_pair_failed == 1) { 3562 return(0); 3563 } 3564 } 3565 } else { 3566 /* normal accounting */ 3567 num_missing++; 3568 } 3569 } 3570 if ((parity_type == '1') && (c%2 == 1)) { 3571 /* Just did an even component, and we didn't 3572 bail.. reset the even_pair_failed flag, 3573 and go on to the next component.... */ 3574 even_pair_failed = 0; 3575 } 3576 } 3577 3578 clabel = cset->ac->clabel; 3579 3580 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3581 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3582 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3583 /* XXX this needs to be made *much* more general */ 3584 /* Too many failures */ 3585 return(0); 3586 } 3587 /* otherwise, all is well, and we've got enough to take a kick 3588 at autoconfiguring this set */ 3589 return(1); 3590 } 3591 3592 void 3593 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3594 RF_Raid_t *raidPtr) 3595 { 3596 RF_ComponentLabel_t *clabel; 3597 int i; 3598 3599 clabel = ac->clabel; 3600 3601 /* 1. Fill in the common stuff */ 3602 config->numRow = clabel->num_rows = 1; 3603 config->numCol = clabel->num_columns; 3604 config->numSpare = 0; /* XXX should this be set here? */ 3605 config->sectPerSU = clabel->sectPerSU; 3606 config->SUsPerPU = clabel->SUsPerPU; 3607 config->SUsPerRU = clabel->SUsPerRU; 3608 config->parityConfig = clabel->parityConfig; 3609 /* XXX... */ 3610 strcpy(config->diskQueueType,"fifo"); 3611 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3612 config->layoutSpecificSize = 0; /* XXX ?? */ 3613 3614 while(ac!=NULL) { 3615 /* row/col values will be in range due to the checks 3616 in reasonable_label() */ 3617 strcpy(config->devnames[0][ac->clabel->column], 3618 ac->devname); 3619 ac = ac->next; 3620 } 3621 3622 for(i=0;i<RF_MAXDBGV;i++) { 3623 config->debugVars[i][0] = 0; 3624 } 3625 } 3626 3627 int 3628 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3629 { 3630 RF_ComponentLabel_t *clabel; 3631 int column; 3632 int sparecol; 3633 3634 raidPtr->autoconfigure = new_value; 3635 3636 for(column=0; column<raidPtr->numCol; column++) { 3637 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3638 clabel = raidget_component_label(raidPtr, column); 3639 clabel->autoconfigure = new_value; 3640 raidflush_component_label(raidPtr, column); 3641 } 3642 } 3643 for(column = 0; column < raidPtr->numSpare ; column++) { 3644 sparecol = raidPtr->numCol + column; 3645 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3646 clabel = raidget_component_label(raidPtr, sparecol); 3647 clabel->autoconfigure = new_value; 3648 raidflush_component_label(raidPtr, sparecol); 3649 } 3650 } 3651 return(new_value); 3652 } 3653 3654 int 3655 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3656 { 3657 RF_ComponentLabel_t *clabel; 3658 int column; 3659 int sparecol; 3660 3661 raidPtr->root_partition = new_value; 3662 for(column=0; column<raidPtr->numCol; column++) { 3663 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3664 clabel = raidget_component_label(raidPtr, column); 3665 clabel->root_partition = new_value; 3666 raidflush_component_label(raidPtr, column); 3667 } 3668 } 3669 for(column = 0; column < raidPtr->numSpare ; column++) { 3670 sparecol = raidPtr->numCol + column; 3671 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3672 clabel = raidget_component_label(raidPtr, sparecol); 3673 clabel->root_partition = new_value; 3674 raidflush_component_label(raidPtr, sparecol); 3675 } 3676 } 3677 return(new_value); 3678 } 3679 3680 void 3681 rf_release_all_vps(RF_ConfigSet_t *cset) 3682 { 3683 RF_AutoConfig_t *ac; 3684 3685 ac = cset->ac; 3686 while(ac!=NULL) { 3687 /* Close the vp, and give it back */ 3688 if (ac->vp) { 3689 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3690 VOP_CLOSE(ac->vp, FREAD, NOCRED); 3691 vput(ac->vp); 3692 ac->vp = NULL; 3693 } 3694 ac = ac->next; 3695 } 3696 } 3697 3698 3699 void 3700 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3701 { 3702 RF_AutoConfig_t *ac; 3703 RF_AutoConfig_t *next_ac; 3704 3705 ac = cset->ac; 3706 while(ac!=NULL) { 3707 next_ac = ac->next; 3708 /* nuke the label */ 3709 free(ac->clabel, M_RAIDFRAME); 3710 /* cleanup the config structure */ 3711 free(ac, M_RAIDFRAME); 3712 /* "next.." */ 3713 ac = next_ac; 3714 } 3715 /* and, finally, nuke the config set */ 3716 free(cset, M_RAIDFRAME); 3717 } 3718 3719 3720 void 3721 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3722 { 3723 /* current version number */ 3724 clabel->version = RF_COMPONENT_LABEL_VERSION; 3725 clabel->serial_number = raidPtr->serial_number; 3726 clabel->mod_counter = raidPtr->mod_counter; 3727 3728 clabel->num_rows = 1; 3729 clabel->num_columns = raidPtr->numCol; 3730 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3731 clabel->status = rf_ds_optimal; /* "It's good!" */ 3732 3733 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3734 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3735 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3736 3737 clabel->blockSize = raidPtr->bytesPerSector; 3738 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk); 3739 3740 /* XXX not portable */ 3741 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3742 clabel->maxOutstanding = raidPtr->maxOutstanding; 3743 clabel->autoconfigure = raidPtr->autoconfigure; 3744 clabel->root_partition = raidPtr->root_partition; 3745 clabel->last_unit = raidPtr->raidid; 3746 clabel->config_order = raidPtr->config_order; 3747 3748 #ifndef RF_NO_PARITY_MAP 3749 rf_paritymap_init_label(raidPtr->parity_map, clabel); 3750 #endif 3751 } 3752 3753 struct raid_softc * 3754 rf_auto_config_set(RF_ConfigSet_t *cset) 3755 { 3756 RF_Raid_t *raidPtr; 3757 RF_Config_t *config; 3758 int raidID; 3759 struct raid_softc *sc; 3760 3761 #ifdef DEBUG 3762 printf("RAID autoconfigure\n"); 3763 #endif 3764 3765 /* 1. Create a config structure */ 3766 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO); 3767 if (config == NULL) { 3768 printf("Out of mem!?!?\n"); 3769 /* XXX do something more intelligent here. */ 3770 return NULL; 3771 } 3772 3773 /* 3774 2. Figure out what RAID ID this one is supposed to live at 3775 See if we can get the same RAID dev that it was configured 3776 on last time.. 3777 */ 3778 3779 raidID = cset->ac->clabel->last_unit; 3780 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID)) 3781 continue; 3782 #ifdef DEBUG 3783 printf("Configuring raid%d:\n",raidID); 3784 #endif 3785 3786 raidPtr = &sc->sc_r; 3787 3788 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3789 raidPtr->softc = sc; 3790 raidPtr->raidid = raidID; 3791 raidPtr->openings = RAIDOUTSTANDING; 3792 3793 /* 3. Build the configuration structure */ 3794 rf_create_configuration(cset->ac, config, raidPtr); 3795 3796 /* 4. Do the configuration */ 3797 if (rf_Configure(raidPtr, config, cset->ac) == 0) { 3798 raidinit(sc); 3799 3800 rf_markalldirty(raidPtr); 3801 raidPtr->autoconfigure = 1; /* XXX do this here? */ 3802 switch (cset->ac->clabel->root_partition) { 3803 case 1: /* Force Root */ 3804 case 2: /* Soft Root: root when boot partition part of raid */ 3805 /* 3806 * everything configured just fine. Make a note 3807 * that this set is eligible to be root, 3808 * or forced to be root 3809 */ 3810 cset->rootable = cset->ac->clabel->root_partition; 3811 /* XXX do this here? */ 3812 raidPtr->root_partition = cset->rootable; 3813 break; 3814 default: 3815 break; 3816 } 3817 } else { 3818 raidput(sc); 3819 sc = NULL; 3820 } 3821 3822 /* 5. Cleanup */ 3823 free(config, M_RAIDFRAME); 3824 return sc; 3825 } 3826 3827 void 3828 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3829 { 3830 struct buf *bp; 3831 struct raid_softc *rs; 3832 3833 bp = (struct buf *)desc->bp; 3834 rs = desc->raidPtr->softc; 3835 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid), 3836 (bp->b_flags & B_READ)); 3837 } 3838 3839 void 3840 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3841 size_t xmin, size_t xmax) 3842 { 3843 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3844 pool_sethiwat(p, xmax); 3845 pool_prime(p, xmin); 3846 pool_setlowat(p, xmin); 3847 } 3848 3849 /* 3850 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see 3851 * if there is IO pending and if that IO could possibly be done for a 3852 * given RAID set. Returns 0 if IO is waiting and can be done, 1 3853 * otherwise. 3854 * 3855 */ 3856 3857 int 3858 rf_buf_queue_check(RF_Raid_t *raidPtr) 3859 { 3860 struct raid_softc *rs = raidPtr->softc; 3861 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) { 3862 /* there is work to do */ 3863 return 0; 3864 } 3865 /* default is nothing to do */ 3866 return 1; 3867 } 3868 3869 int 3870 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr) 3871 { 3872 uint64_t numsecs; 3873 unsigned secsize; 3874 int error; 3875 3876 error = getdisksize(vp, &numsecs, &secsize); 3877 if (error == 0) { 3878 diskPtr->blockSize = secsize; 3879 diskPtr->numBlocks = numsecs - rf_protectedSectors; 3880 diskPtr->partitionSize = numsecs; 3881 return 0; 3882 } 3883 return error; 3884 } 3885 3886 static int 3887 raid_match(device_t self, cfdata_t cfdata, void *aux) 3888 { 3889 return 1; 3890 } 3891 3892 static void 3893 raid_attach(device_t parent, device_t self, void *aux) 3894 { 3895 3896 } 3897 3898 3899 static int 3900 raid_detach(device_t self, int flags) 3901 { 3902 int error; 3903 struct raid_softc *rs = raidget(device_unit(self)); 3904 3905 if (rs == NULL) 3906 return ENXIO; 3907 3908 if ((error = raidlock(rs)) != 0) 3909 return (error); 3910 3911 error = raid_detach_unlocked(rs); 3912 3913 raidunlock(rs); 3914 3915 /* XXXkd: raidput(rs) ??? */ 3916 3917 return error; 3918 } 3919 3920 static void 3921 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr) 3922 { 3923 struct disk_geom *dg = &rs->sc_dkdev.dk_geom; 3924 3925 memset(dg, 0, sizeof(*dg)); 3926 3927 dg->dg_secperunit = raidPtr->totalSectors; 3928 dg->dg_secsize = raidPtr->bytesPerSector; 3929 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe; 3930 dg->dg_ntracks = 4 * raidPtr->numCol; 3931 3932 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL); 3933 } 3934 3935 /* 3936 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components. 3937 * We end up returning whatever error was returned by the first cache flush 3938 * that fails. 3939 */ 3940 3941 int 3942 rf_sync_component_caches(RF_Raid_t *raidPtr) 3943 { 3944 int c, sparecol; 3945 int e,error; 3946 int force = 1; 3947 3948 error = 0; 3949 for (c = 0; c < raidPtr->numCol; c++) { 3950 if (raidPtr->Disks[c].status == rf_ds_optimal) { 3951 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC, 3952 &force, FWRITE, NOCRED); 3953 if (e) { 3954 if (e != ENODEV) 3955 printf("raid%d: cache flush to component %s failed.\n", 3956 raidPtr->raidid, raidPtr->Disks[c].devname); 3957 if (error == 0) { 3958 error = e; 3959 } 3960 } 3961 } 3962 } 3963 3964 for( c = 0; c < raidPtr->numSpare ; c++) { 3965 sparecol = raidPtr->numCol + c; 3966 /* Need to ensure that the reconstruct actually completed! */ 3967 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3968 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp, 3969 DIOCCACHESYNC, &force, FWRITE, NOCRED); 3970 if (e) { 3971 if (e != ENODEV) 3972 printf("raid%d: cache flush to component %s failed.\n", 3973 raidPtr->raidid, raidPtr->Disks[sparecol].devname); 3974 if (error == 0) { 3975 error = e; 3976 } 3977 } 3978 } 3979 } 3980 return error; 3981 } 3982