1 /* $NetBSD: rf_netbsdkintf.c,v 1.308 2014/04/03 18:55:26 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Greg Oster; Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * Copyright (c) 1995 Carnegie-Mellon University. 72 * All rights reserved. 73 * 74 * Authors: Mark Holland, Jim Zelenka 75 * 76 * Permission to use, copy, modify and distribute this software and 77 * its documentation is hereby granted, provided that both the copyright 78 * notice and this permission notice appear in all copies of the 79 * software, derivative works or modified versions, and any portions 80 * thereof, and that both notices appear in supporting documentation. 81 * 82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 85 * 86 * Carnegie Mellon requests users of this software to return to 87 * 88 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 89 * School of Computer Science 90 * Carnegie Mellon University 91 * Pittsburgh PA 15213-3890 92 * 93 * any improvements or extensions that they make and grant Carnegie the 94 * rights to redistribute these changes. 95 */ 96 97 /*********************************************************** 98 * 99 * rf_kintf.c -- the kernel interface routines for RAIDframe 100 * 101 ***********************************************************/ 102 103 #include <sys/cdefs.h> 104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.308 2014/04/03 18:55:26 christos Exp $"); 105 106 #ifdef _KERNEL_OPT 107 #include "opt_compat_netbsd.h" 108 #include "opt_raid_autoconfig.h" 109 #endif 110 111 #include <sys/param.h> 112 #include <sys/errno.h> 113 #include <sys/pool.h> 114 #include <sys/proc.h> 115 #include <sys/queue.h> 116 #include <sys/disk.h> 117 #include <sys/device.h> 118 #include <sys/stat.h> 119 #include <sys/ioctl.h> 120 #include <sys/fcntl.h> 121 #include <sys/systm.h> 122 #include <sys/vnode.h> 123 #include <sys/disklabel.h> 124 #include <sys/conf.h> 125 #include <sys/buf.h> 126 #include <sys/bufq.h> 127 #include <sys/reboot.h> 128 #include <sys/kauth.h> 129 130 #include <prop/proplib.h> 131 132 #include <dev/raidframe/raidframevar.h> 133 #include <dev/raidframe/raidframeio.h> 134 #include <dev/raidframe/rf_paritymap.h> 135 136 #include "rf_raid.h" 137 #include "rf_copyback.h" 138 #include "rf_dag.h" 139 #include "rf_dagflags.h" 140 #include "rf_desc.h" 141 #include "rf_diskqueue.h" 142 #include "rf_etimer.h" 143 #include "rf_general.h" 144 #include "rf_kintf.h" 145 #include "rf_options.h" 146 #include "rf_driver.h" 147 #include "rf_parityscan.h" 148 #include "rf_threadstuff.h" 149 150 #ifdef COMPAT_50 151 #include "rf_compat50.h" 152 #endif 153 154 #ifdef DEBUG 155 int rf_kdebug_level = 0; 156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 157 #else /* DEBUG */ 158 #define db1_printf(a) { } 159 #endif /* DEBUG */ 160 161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 162 static rf_declare_mutex2(rf_sparet_wait_mutex); 163 static rf_declare_cond2(rf_sparet_wait_cv); 164 static rf_declare_cond2(rf_sparet_resp_cv); 165 166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 167 * spare table */ 168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 169 * installation process */ 170 #endif 171 172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 173 174 /* prototypes */ 175 static void KernelWakeupFunc(struct buf *); 176 static void InitBP(struct buf *, struct vnode *, unsigned, 177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 178 void *, int, struct proc *); 179 struct raid_softc; 180 static void raidinit(struct raid_softc *); 181 182 void raidattach(int); 183 static int raid_match(device_t, cfdata_t, void *); 184 static void raid_attach(device_t, device_t, void *); 185 static int raid_detach(device_t, int); 186 187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t, 188 daddr_t, daddr_t); 189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t, 190 daddr_t, daddr_t, int); 191 192 static int raidwrite_component_label(unsigned, 193 dev_t, struct vnode *, RF_ComponentLabel_t *); 194 static int raidread_component_label(unsigned, 195 dev_t, struct vnode *, RF_ComponentLabel_t *); 196 197 198 dev_type_open(raidopen); 199 dev_type_close(raidclose); 200 dev_type_read(raidread); 201 dev_type_write(raidwrite); 202 dev_type_ioctl(raidioctl); 203 dev_type_strategy(raidstrategy); 204 dev_type_dump(raiddump); 205 dev_type_size(raidsize); 206 207 const struct bdevsw raid_bdevsw = { 208 .d_open = raidopen, 209 .d_close = raidclose, 210 .d_strategy = raidstrategy, 211 .d_ioctl = raidioctl, 212 .d_dump = raiddump, 213 .d_psize = raidsize, 214 .d_flag = D_DISK 215 }; 216 217 const struct cdevsw raid_cdevsw = { 218 .d_open = raidopen, 219 .d_close = raidclose, 220 .d_read = raidread, 221 .d_write = raidwrite, 222 .d_ioctl = raidioctl, 223 .d_stop = nostop, 224 .d_tty = notty, 225 .d_poll = nopoll, 226 .d_mmap = nommap, 227 .d_kqfilter = nokqfilter, 228 .d_flag = D_DISK 229 }; 230 231 static struct dkdriver rf_dkdriver = { raidstrategy, minphys }; 232 233 struct raid_softc { 234 device_t sc_dev; 235 int sc_unit; 236 int sc_flags; /* flags */ 237 int sc_cflags; /* configuration flags */ 238 uint64_t sc_size; /* size of the raid device */ 239 char sc_xname[20]; /* XXX external name */ 240 struct disk sc_dkdev; /* generic disk device info */ 241 struct bufq_state *buf_queue; /* used for the device queue */ 242 RF_Raid_t sc_r; 243 LIST_ENTRY(raid_softc) sc_link; 244 }; 245 /* sc_flags */ 246 #define RAIDF_INITED 0x01 /* unit has been initialized */ 247 #define RAIDF_WLABEL 0x02 /* label area is writable */ 248 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 249 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */ 250 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 251 #define RAIDF_LOCKED 0x80 /* unit is locked */ 252 253 #define raidunit(x) DISKUNIT(x) 254 255 extern struct cfdriver raid_cd; 256 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc), 257 raid_match, raid_attach, raid_detach, NULL, NULL, NULL, 258 DVF_DETACH_SHUTDOWN); 259 260 /* 261 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 262 * Be aware that large numbers can allow the driver to consume a lot of 263 * kernel memory, especially on writes, and in degraded mode reads. 264 * 265 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 266 * a single 64K write will typically require 64K for the old data, 267 * 64K for the old parity, and 64K for the new parity, for a total 268 * of 192K (if the parity buffer is not re-used immediately). 269 * Even it if is used immediately, that's still 128K, which when multiplied 270 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 271 * 272 * Now in degraded mode, for example, a 64K read on the above setup may 273 * require data reconstruction, which will require *all* of the 4 remaining 274 * disks to participate -- 4 * 32K/disk == 128K again. 275 */ 276 277 #ifndef RAIDOUTSTANDING 278 #define RAIDOUTSTANDING 6 279 #endif 280 281 #define RAIDLABELDEV(dev) \ 282 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 283 284 /* declared here, and made public, for the benefit of KVM stuff.. */ 285 286 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 287 struct disklabel *); 288 static void raidgetdisklabel(dev_t); 289 static void raidmakedisklabel(struct raid_softc *); 290 291 static int raidlock(struct raid_softc *); 292 static void raidunlock(struct raid_softc *); 293 294 static int raid_detach_unlocked(struct raid_softc *); 295 296 static void rf_markalldirty(RF_Raid_t *); 297 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *); 298 299 void rf_ReconThread(struct rf_recon_req *); 300 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 301 void rf_CopybackThread(RF_Raid_t *raidPtr); 302 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 303 int rf_autoconfig(device_t); 304 void rf_buildroothack(RF_ConfigSet_t *); 305 306 RF_AutoConfig_t *rf_find_raid_components(void); 307 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 308 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 309 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t); 310 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 311 int rf_set_autoconfig(RF_Raid_t *, int); 312 int rf_set_rootpartition(RF_Raid_t *, int); 313 void rf_release_all_vps(RF_ConfigSet_t *); 314 void rf_cleanup_config_set(RF_ConfigSet_t *); 315 int rf_have_enough_components(RF_ConfigSet_t *); 316 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *); 317 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t); 318 319 /* 320 * Debugging, mostly. Set to 0 to not allow autoconfig to take place. 321 * Note that this is overridden by having RAID_AUTOCONFIG as an option 322 * in the kernel config file. 323 */ 324 #ifdef RAID_AUTOCONFIG 325 int raidautoconfig = 1; 326 #else 327 int raidautoconfig = 0; 328 #endif 329 static bool raidautoconfigdone = false; 330 331 struct RF_Pools_s rf_pools; 332 333 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids); 334 static kmutex_t raid_lock; 335 336 static struct raid_softc * 337 raidcreate(int unit) { 338 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 339 if (sc == NULL) { 340 #ifdef DIAGNOSTIC 341 printf("%s: out of memory\n", __func__); 342 #endif 343 return NULL; 344 } 345 sc->sc_unit = unit; 346 bufq_alloc(&sc->buf_queue, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK); 347 return sc; 348 } 349 350 static void 351 raiddestroy(struct raid_softc *sc) { 352 bufq_free(sc->buf_queue); 353 kmem_free(sc, sizeof(*sc)); 354 } 355 356 static struct raid_softc * 357 raidget(int unit) { 358 struct raid_softc *sc; 359 if (unit < 0) { 360 #ifdef DIAGNOSTIC 361 panic("%s: unit %d!", __func__, unit); 362 #endif 363 return NULL; 364 } 365 mutex_enter(&raid_lock); 366 LIST_FOREACH(sc, &raids, sc_link) { 367 if (sc->sc_unit == unit) { 368 mutex_exit(&raid_lock); 369 return sc; 370 } 371 } 372 mutex_exit(&raid_lock); 373 if ((sc = raidcreate(unit)) == NULL) 374 return NULL; 375 mutex_enter(&raid_lock); 376 LIST_INSERT_HEAD(&raids, sc, sc_link); 377 mutex_exit(&raid_lock); 378 return sc; 379 } 380 381 static void 382 raidput(struct raid_softc *sc) { 383 mutex_enter(&raid_lock); 384 LIST_REMOVE(sc, sc_link); 385 mutex_exit(&raid_lock); 386 raiddestroy(sc); 387 } 388 389 void 390 raidattach(int num) 391 { 392 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE); 393 /* This is where all the initialization stuff gets done. */ 394 395 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 396 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM); 397 rf_init_cond2(rf_sparet_wait_cv, "sparetw"); 398 rf_init_cond2(rf_sparet_resp_cv, "rfgst"); 399 400 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 401 #endif 402 403 if (rf_BootRaidframe() == 0) 404 aprint_verbose("Kernelized RAIDframe activated\n"); 405 else 406 panic("Serious error booting RAID!!"); 407 408 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) { 409 aprint_error("raidattach: config_cfattach_attach failed?\n"); 410 } 411 412 raidautoconfigdone = false; 413 414 /* 415 * Register a finalizer which will be used to auto-config RAID 416 * sets once all real hardware devices have been found. 417 */ 418 if (config_finalize_register(NULL, rf_autoconfig) != 0) 419 aprint_error("WARNING: unable to register RAIDframe finalizer\n"); 420 } 421 422 int 423 rf_autoconfig(device_t self) 424 { 425 RF_AutoConfig_t *ac_list; 426 RF_ConfigSet_t *config_sets; 427 428 if (!raidautoconfig || raidautoconfigdone == true) 429 return (0); 430 431 /* XXX This code can only be run once. */ 432 raidautoconfigdone = true; 433 434 #ifdef __HAVE_CPU_BOOTCONF 435 /* 436 * 0. find the boot device if needed first so we can use it later 437 * this needs to be done before we autoconfigure any raid sets, 438 * because if we use wedges we are not going to be able to open 439 * the boot device later 440 */ 441 if (booted_device == NULL) 442 cpu_bootconf(); 443 #endif 444 /* 1. locate all RAID components on the system */ 445 aprint_debug("Searching for RAID components...\n"); 446 ac_list = rf_find_raid_components(); 447 448 /* 2. Sort them into their respective sets. */ 449 config_sets = rf_create_auto_sets(ac_list); 450 451 /* 452 * 3. Evaluate each set and configure the valid ones. 453 * This gets done in rf_buildroothack(). 454 */ 455 rf_buildroothack(config_sets); 456 457 return 1; 458 } 459 460 static int 461 rf_containsboot(RF_Raid_t *r, device_t bdv) { 462 const char *bootname = device_xname(bdv); 463 size_t len = strlen(bootname); 464 465 for (int col = 0; col < r->numCol; col++) { 466 const char *devname = r->Disks[col].devname; 467 devname += sizeof("/dev/") - 1; 468 if (strncmp(devname, "dk", 2) == 0) { 469 const char *parent = 470 dkwedge_get_parent_name(r->Disks[col].dev); 471 if (parent != NULL) 472 devname = parent; 473 } 474 if (strncmp(devname, bootname, len) == 0) { 475 struct raid_softc *sc = r->softc; 476 aprint_debug("raid%d includes boot device %s\n", 477 sc->sc_unit, devname); 478 return 1; 479 } 480 } 481 return 0; 482 } 483 484 void 485 rf_buildroothack(RF_ConfigSet_t *config_sets) 486 { 487 RF_ConfigSet_t *cset; 488 RF_ConfigSet_t *next_cset; 489 int num_root; 490 struct raid_softc *sc, *rsc; 491 492 sc = rsc = NULL; 493 num_root = 0; 494 cset = config_sets; 495 while (cset != NULL) { 496 next_cset = cset->next; 497 if (rf_have_enough_components(cset) && 498 cset->ac->clabel->autoconfigure == 1) { 499 sc = rf_auto_config_set(cset); 500 if (sc != NULL) { 501 aprint_debug("raid%d: configured ok\n", 502 sc->sc_unit); 503 if (cset->rootable) { 504 rsc = sc; 505 num_root++; 506 } 507 } else { 508 /* The autoconfig didn't work :( */ 509 aprint_debug("Autoconfig failed\n"); 510 rf_release_all_vps(cset); 511 } 512 } else { 513 /* we're not autoconfiguring this set... 514 release the associated resources */ 515 rf_release_all_vps(cset); 516 } 517 /* cleanup */ 518 rf_cleanup_config_set(cset); 519 cset = next_cset; 520 } 521 522 /* if the user has specified what the root device should be 523 then we don't touch booted_device or boothowto... */ 524 525 if (rootspec != NULL) 526 return; 527 528 /* we found something bootable... */ 529 530 if (num_root == 1) { 531 device_t candidate_root; 532 if (rsc->sc_dkdev.dk_nwedges != 0) { 533 /* XXX: How do we find the real root partition? */ 534 char cname[sizeof(cset->ac->devname)]; 535 snprintf(cname, sizeof(cname), "%s%c", 536 device_xname(rsc->sc_dev), 'a'); 537 candidate_root = dkwedge_find_by_wname(cname); 538 } else 539 candidate_root = rsc->sc_dev; 540 if (booted_device == NULL || 541 rsc->sc_r.root_partition == 1 || 542 rf_containsboot(&rsc->sc_r, booted_device)) 543 booted_device = candidate_root; 544 } else if (num_root > 1) { 545 546 /* 547 * Maybe the MD code can help. If it cannot, then 548 * setroot() will discover that we have no 549 * booted_device and will ask the user if nothing was 550 * hardwired in the kernel config file 551 */ 552 if (booted_device == NULL) 553 return; 554 555 num_root = 0; 556 mutex_enter(&raid_lock); 557 LIST_FOREACH(sc, &raids, sc_link) { 558 RF_Raid_t *r = &sc->sc_r; 559 if (r->valid == 0) 560 continue; 561 562 if (r->root_partition == 0) 563 continue; 564 565 if (rf_containsboot(r, booted_device)) { 566 num_root++; 567 rsc = sc; 568 } 569 } 570 mutex_exit(&raid_lock); 571 572 if (num_root == 1) { 573 booted_device = rsc->sc_dev; 574 } else { 575 /* we can't guess.. require the user to answer... */ 576 boothowto |= RB_ASKNAME; 577 } 578 } 579 } 580 581 582 int 583 raidsize(dev_t dev) 584 { 585 struct raid_softc *rs; 586 struct disklabel *lp; 587 int part, unit, omask, size; 588 589 unit = raidunit(dev); 590 if ((rs = raidget(unit)) == NULL) 591 return -1; 592 if ((rs->sc_flags & RAIDF_INITED) == 0) 593 return (-1); 594 595 part = DISKPART(dev); 596 omask = rs->sc_dkdev.dk_openmask & (1 << part); 597 lp = rs->sc_dkdev.dk_label; 598 599 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp)) 600 return (-1); 601 602 if (lp->d_partitions[part].p_fstype != FS_SWAP) 603 size = -1; 604 else 605 size = lp->d_partitions[part].p_size * 606 (lp->d_secsize / DEV_BSIZE); 607 608 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp)) 609 return (-1); 610 611 return (size); 612 613 } 614 615 int 616 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 617 { 618 int unit = raidunit(dev); 619 struct raid_softc *rs; 620 const struct bdevsw *bdev; 621 struct disklabel *lp; 622 RF_Raid_t *raidPtr; 623 daddr_t offset; 624 int part, c, sparecol, j, scol, dumpto; 625 int error = 0; 626 627 if ((rs = raidget(unit)) == NULL) 628 return ENXIO; 629 630 raidPtr = &rs->sc_r; 631 632 if ((rs->sc_flags & RAIDF_INITED) == 0) 633 return ENXIO; 634 635 /* we only support dumping to RAID 1 sets */ 636 if (raidPtr->Layout.numDataCol != 1 || 637 raidPtr->Layout.numParityCol != 1) 638 return EINVAL; 639 640 641 if ((error = raidlock(rs)) != 0) 642 return error; 643 644 if (size % DEV_BSIZE != 0) { 645 error = EINVAL; 646 goto out; 647 } 648 649 if (blkno + size / DEV_BSIZE > rs->sc_size) { 650 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > " 651 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno, 652 size / DEV_BSIZE, rs->sc_size); 653 error = EINVAL; 654 goto out; 655 } 656 657 part = DISKPART(dev); 658 lp = rs->sc_dkdev.dk_label; 659 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS; 660 661 /* figure out what device is alive.. */ 662 663 /* 664 Look for a component to dump to. The preference for the 665 component to dump to is as follows: 666 1) the master 667 2) a used_spare of the master 668 3) the slave 669 4) a used_spare of the slave 670 */ 671 672 dumpto = -1; 673 for (c = 0; c < raidPtr->numCol; c++) { 674 if (raidPtr->Disks[c].status == rf_ds_optimal) { 675 /* this might be the one */ 676 dumpto = c; 677 break; 678 } 679 } 680 681 /* 682 At this point we have possibly selected a live master or a 683 live slave. We now check to see if there is a spared 684 master (or a spared slave), if we didn't find a live master 685 or a live slave. 686 */ 687 688 for (c = 0; c < raidPtr->numSpare; c++) { 689 sparecol = raidPtr->numCol + c; 690 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 691 /* How about this one? */ 692 scol = -1; 693 for(j=0;j<raidPtr->numCol;j++) { 694 if (raidPtr->Disks[j].spareCol == sparecol) { 695 scol = j; 696 break; 697 } 698 } 699 if (scol == 0) { 700 /* 701 We must have found a spared master! 702 We'll take that over anything else 703 found so far. (We couldn't have 704 found a real master before, since 705 this is a used spare, and it's 706 saying that it's replacing the 707 master.) On reboot (with 708 autoconfiguration turned on) 709 sparecol will become the 1st 710 component (component0) of this set. 711 */ 712 dumpto = sparecol; 713 break; 714 } else if (scol != -1) { 715 /* 716 Must be a spared slave. We'll dump 717 to that if we havn't found anything 718 else so far. 719 */ 720 if (dumpto == -1) 721 dumpto = sparecol; 722 } 723 } 724 } 725 726 if (dumpto == -1) { 727 /* we couldn't find any live components to dump to!?!? 728 */ 729 error = EINVAL; 730 goto out; 731 } 732 733 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 734 735 /* 736 Note that blkno is relative to this particular partition. 737 By adding the offset of this partition in the RAID 738 set, and also adding RF_PROTECTED_SECTORS, we get a 739 value that is relative to the partition used for the 740 underlying component. 741 */ 742 743 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 744 blkno + offset, va, size); 745 746 out: 747 raidunlock(rs); 748 749 return error; 750 } 751 /* ARGSUSED */ 752 int 753 raidopen(dev_t dev, int flags, int fmt, 754 struct lwp *l) 755 { 756 int unit = raidunit(dev); 757 struct raid_softc *rs; 758 struct disklabel *lp; 759 int part, pmask; 760 int error = 0; 761 762 if ((rs = raidget(unit)) == NULL) 763 return ENXIO; 764 if ((error = raidlock(rs)) != 0) 765 return (error); 766 767 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) { 768 error = EBUSY; 769 goto bad; 770 } 771 772 lp = rs->sc_dkdev.dk_label; 773 774 part = DISKPART(dev); 775 776 /* 777 * If there are wedges, and this is not RAW_PART, then we 778 * need to fail. 779 */ 780 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 781 error = EBUSY; 782 goto bad; 783 } 784 pmask = (1 << part); 785 786 if ((rs->sc_flags & RAIDF_INITED) && 787 (rs->sc_dkdev.dk_openmask == 0)) 788 raidgetdisklabel(dev); 789 790 /* make sure that this partition exists */ 791 792 if (part != RAW_PART) { 793 if (((rs->sc_flags & RAIDF_INITED) == 0) || 794 ((part >= lp->d_npartitions) || 795 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 796 error = ENXIO; 797 goto bad; 798 } 799 } 800 /* Prevent this unit from being unconfigured while open. */ 801 switch (fmt) { 802 case S_IFCHR: 803 rs->sc_dkdev.dk_copenmask |= pmask; 804 break; 805 806 case S_IFBLK: 807 rs->sc_dkdev.dk_bopenmask |= pmask; 808 break; 809 } 810 811 if ((rs->sc_dkdev.dk_openmask == 0) && 812 ((rs->sc_flags & RAIDF_INITED) != 0)) { 813 /* First one... mark things as dirty... Note that we *MUST* 814 have done a configure before this. I DO NOT WANT TO BE 815 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 816 THAT THEY BELONG TOGETHER!!!!! */ 817 /* XXX should check to see if we're only open for reading 818 here... If so, we needn't do this, but then need some 819 other way of keeping track of what's happened.. */ 820 821 rf_markalldirty(&rs->sc_r); 822 } 823 824 825 rs->sc_dkdev.dk_openmask = 826 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 827 828 bad: 829 raidunlock(rs); 830 831 return (error); 832 833 834 } 835 /* ARGSUSED */ 836 int 837 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 838 { 839 int unit = raidunit(dev); 840 struct raid_softc *rs; 841 int error = 0; 842 int part; 843 844 if ((rs = raidget(unit)) == NULL) 845 return ENXIO; 846 847 if ((error = raidlock(rs)) != 0) 848 return (error); 849 850 part = DISKPART(dev); 851 852 /* ...that much closer to allowing unconfiguration... */ 853 switch (fmt) { 854 case S_IFCHR: 855 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 856 break; 857 858 case S_IFBLK: 859 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 860 break; 861 } 862 rs->sc_dkdev.dk_openmask = 863 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 864 865 if ((rs->sc_dkdev.dk_openmask == 0) && 866 ((rs->sc_flags & RAIDF_INITED) != 0)) { 867 /* Last one... device is not unconfigured yet. 868 Device shutdown has taken care of setting the 869 clean bits if RAIDF_INITED is not set 870 mark things as clean... */ 871 872 rf_update_component_labels(&rs->sc_r, 873 RF_FINAL_COMPONENT_UPDATE); 874 875 /* If the kernel is shutting down, it will detach 876 * this RAID set soon enough. 877 */ 878 } 879 880 raidunlock(rs); 881 return (0); 882 883 } 884 885 void 886 raidstrategy(struct buf *bp) 887 { 888 unsigned int unit = raidunit(bp->b_dev); 889 RF_Raid_t *raidPtr; 890 int wlabel; 891 struct raid_softc *rs; 892 893 if ((rs = raidget(unit)) == NULL) { 894 bp->b_error = ENXIO; 895 goto done; 896 } 897 if ((rs->sc_flags & RAIDF_INITED) == 0) { 898 bp->b_error = ENXIO; 899 goto done; 900 } 901 raidPtr = &rs->sc_r; 902 if (!raidPtr->valid) { 903 bp->b_error = ENODEV; 904 goto done; 905 } 906 if (bp->b_bcount == 0) { 907 db1_printf(("b_bcount is zero..\n")); 908 goto done; 909 } 910 911 /* 912 * Do bounds checking and adjust transfer. If there's an 913 * error, the bounds check will flag that for us. 914 */ 915 916 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 917 if (DISKPART(bp->b_dev) == RAW_PART) { 918 uint64_t size; /* device size in DEV_BSIZE unit */ 919 920 if (raidPtr->logBytesPerSector > DEV_BSHIFT) { 921 size = raidPtr->totalSectors << 922 (raidPtr->logBytesPerSector - DEV_BSHIFT); 923 } else { 924 size = raidPtr->totalSectors >> 925 (DEV_BSHIFT - raidPtr->logBytesPerSector); 926 } 927 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) { 928 goto done; 929 } 930 } else { 931 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 932 db1_printf(("Bounds check failed!!:%d %d\n", 933 (int) bp->b_blkno, (int) wlabel)); 934 goto done; 935 } 936 } 937 938 rf_lock_mutex2(raidPtr->iodone_lock); 939 940 bp->b_resid = 0; 941 942 /* stuff it onto our queue */ 943 bufq_put(rs->buf_queue, bp); 944 945 /* scheduled the IO to happen at the next convenient time */ 946 rf_signal_cond2(raidPtr->iodone_cv); 947 rf_unlock_mutex2(raidPtr->iodone_lock); 948 949 return; 950 951 done: 952 bp->b_resid = bp->b_bcount; 953 biodone(bp); 954 } 955 /* ARGSUSED */ 956 int 957 raidread(dev_t dev, struct uio *uio, int flags) 958 { 959 int unit = raidunit(dev); 960 struct raid_softc *rs; 961 962 if ((rs = raidget(unit)) == NULL) 963 return ENXIO; 964 965 if ((rs->sc_flags & RAIDF_INITED) == 0) 966 return (ENXIO); 967 968 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 969 970 } 971 /* ARGSUSED */ 972 int 973 raidwrite(dev_t dev, struct uio *uio, int flags) 974 { 975 int unit = raidunit(dev); 976 struct raid_softc *rs; 977 978 if ((rs = raidget(unit)) == NULL) 979 return ENXIO; 980 981 if ((rs->sc_flags & RAIDF_INITED) == 0) 982 return (ENXIO); 983 984 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 985 986 } 987 988 static int 989 raid_detach_unlocked(struct raid_softc *rs) 990 { 991 int error; 992 RF_Raid_t *raidPtr; 993 994 raidPtr = &rs->sc_r; 995 996 /* 997 * If somebody has a partition mounted, we shouldn't 998 * shutdown. 999 */ 1000 if (rs->sc_dkdev.dk_openmask != 0) 1001 return EBUSY; 1002 1003 if ((rs->sc_flags & RAIDF_INITED) == 0) 1004 ; /* not initialized: nothing to do */ 1005 else if ((error = rf_Shutdown(raidPtr)) != 0) 1006 return error; 1007 else 1008 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN); 1009 1010 /* Detach the disk. */ 1011 dkwedge_delall(&rs->sc_dkdev); 1012 disk_detach(&rs->sc_dkdev); 1013 disk_destroy(&rs->sc_dkdev); 1014 1015 aprint_normal_dev(rs->sc_dev, "detached\n"); 1016 1017 return 0; 1018 } 1019 1020 int 1021 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1022 { 1023 int unit = raidunit(dev); 1024 int error = 0; 1025 int part, pmask, s; 1026 cfdata_t cf; 1027 struct raid_softc *rs; 1028 RF_Config_t *k_cfg, *u_cfg; 1029 RF_Raid_t *raidPtr; 1030 RF_RaidDisk_t *diskPtr; 1031 RF_AccTotals_t *totals; 1032 RF_DeviceConfig_t *d_cfg, **ucfgp; 1033 u_char *specific_buf; 1034 int retcode = 0; 1035 int column; 1036 /* int raidid; */ 1037 struct rf_recon_req *rrcopy, *rr; 1038 RF_ComponentLabel_t *clabel; 1039 RF_ComponentLabel_t *ci_label; 1040 RF_ComponentLabel_t **clabel_ptr; 1041 RF_SingleComponent_t *sparePtr,*componentPtr; 1042 RF_SingleComponent_t component; 1043 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 1044 int i, j, d; 1045 #ifdef __HAVE_OLD_DISKLABEL 1046 struct disklabel newlabel; 1047 #endif 1048 struct dkwedge_info *dkw; 1049 1050 if ((rs = raidget(unit)) == NULL) 1051 return ENXIO; 1052 raidPtr = &rs->sc_r; 1053 1054 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev, 1055 (int) DISKPART(dev), (int) unit, cmd)); 1056 1057 /* Must be open for writes for these commands... */ 1058 switch (cmd) { 1059 #ifdef DIOCGSECTORSIZE 1060 case DIOCGSECTORSIZE: 1061 *(u_int *)data = raidPtr->bytesPerSector; 1062 return 0; 1063 case DIOCGMEDIASIZE: 1064 *(off_t *)data = 1065 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector; 1066 return 0; 1067 #endif 1068 case DIOCSDINFO: 1069 case DIOCWDINFO: 1070 #ifdef __HAVE_OLD_DISKLABEL 1071 case ODIOCWDINFO: 1072 case ODIOCSDINFO: 1073 #endif 1074 case DIOCWLABEL: 1075 case DIOCAWEDGE: 1076 case DIOCDWEDGE: 1077 case DIOCSSTRATEGY: 1078 if ((flag & FWRITE) == 0) 1079 return (EBADF); 1080 } 1081 1082 /* Must be initialized for these... */ 1083 switch (cmd) { 1084 case DIOCGDINFO: 1085 case DIOCSDINFO: 1086 case DIOCWDINFO: 1087 #ifdef __HAVE_OLD_DISKLABEL 1088 case ODIOCGDINFO: 1089 case ODIOCWDINFO: 1090 case ODIOCSDINFO: 1091 case ODIOCGDEFLABEL: 1092 #endif 1093 case DIOCGPART: 1094 case DIOCWLABEL: 1095 case DIOCGDEFLABEL: 1096 case DIOCAWEDGE: 1097 case DIOCDWEDGE: 1098 case DIOCLWEDGES: 1099 case DIOCCACHESYNC: 1100 case RAIDFRAME_SHUTDOWN: 1101 case RAIDFRAME_REWRITEPARITY: 1102 case RAIDFRAME_GET_INFO: 1103 case RAIDFRAME_RESET_ACCTOTALS: 1104 case RAIDFRAME_GET_ACCTOTALS: 1105 case RAIDFRAME_KEEP_ACCTOTALS: 1106 case RAIDFRAME_GET_SIZE: 1107 case RAIDFRAME_FAIL_DISK: 1108 case RAIDFRAME_COPYBACK: 1109 case RAIDFRAME_CHECK_RECON_STATUS: 1110 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1111 case RAIDFRAME_GET_COMPONENT_LABEL: 1112 case RAIDFRAME_SET_COMPONENT_LABEL: 1113 case RAIDFRAME_ADD_HOT_SPARE: 1114 case RAIDFRAME_REMOVE_HOT_SPARE: 1115 case RAIDFRAME_INIT_LABELS: 1116 case RAIDFRAME_REBUILD_IN_PLACE: 1117 case RAIDFRAME_CHECK_PARITY: 1118 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1119 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1120 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1121 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1122 case RAIDFRAME_SET_AUTOCONFIG: 1123 case RAIDFRAME_SET_ROOT: 1124 case RAIDFRAME_DELETE_COMPONENT: 1125 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1126 case RAIDFRAME_PARITYMAP_STATUS: 1127 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1128 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1129 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1130 case DIOCGSTRATEGY: 1131 case DIOCSSTRATEGY: 1132 if ((rs->sc_flags & RAIDF_INITED) == 0) 1133 return (ENXIO); 1134 } 1135 1136 switch (cmd) { 1137 #ifdef COMPAT_50 1138 case RAIDFRAME_GET_INFO50: 1139 return rf_get_info50(raidPtr, data); 1140 1141 case RAIDFRAME_CONFIGURE50: 1142 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0) 1143 return retcode; 1144 goto config; 1145 #endif 1146 /* configure the system */ 1147 case RAIDFRAME_CONFIGURE: 1148 1149 if (raidPtr->valid) { 1150 /* There is a valid RAID set running on this unit! */ 1151 printf("raid%d: Device already configured!\n",unit); 1152 return(EINVAL); 1153 } 1154 1155 /* copy-in the configuration information */ 1156 /* data points to a pointer to the configuration structure */ 1157 1158 u_cfg = *((RF_Config_t **) data); 1159 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 1160 if (k_cfg == NULL) { 1161 return (ENOMEM); 1162 } 1163 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 1164 if (retcode) { 1165 RF_Free(k_cfg, sizeof(RF_Config_t)); 1166 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 1167 retcode)); 1168 return (retcode); 1169 } 1170 goto config; 1171 config: 1172 /* allocate a buffer for the layout-specific data, and copy it 1173 * in */ 1174 if (k_cfg->layoutSpecificSize) { 1175 if (k_cfg->layoutSpecificSize > 10000) { 1176 /* sanity check */ 1177 RF_Free(k_cfg, sizeof(RF_Config_t)); 1178 return (EINVAL); 1179 } 1180 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 1181 (u_char *)); 1182 if (specific_buf == NULL) { 1183 RF_Free(k_cfg, sizeof(RF_Config_t)); 1184 return (ENOMEM); 1185 } 1186 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1187 k_cfg->layoutSpecificSize); 1188 if (retcode) { 1189 RF_Free(k_cfg, sizeof(RF_Config_t)); 1190 RF_Free(specific_buf, 1191 k_cfg->layoutSpecificSize); 1192 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 1193 retcode)); 1194 return (retcode); 1195 } 1196 } else 1197 specific_buf = NULL; 1198 k_cfg->layoutSpecific = specific_buf; 1199 1200 /* should do some kind of sanity check on the configuration. 1201 * Store the sum of all the bytes in the last byte? */ 1202 1203 /* configure the system */ 1204 1205 /* 1206 * Clear the entire RAID descriptor, just to make sure 1207 * there is no stale data left in the case of a 1208 * reconfiguration 1209 */ 1210 memset(raidPtr, 0, sizeof(*raidPtr)); 1211 raidPtr->softc = rs; 1212 raidPtr->raidid = unit; 1213 1214 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1215 1216 if (retcode == 0) { 1217 1218 /* allow this many simultaneous IO's to 1219 this RAID device */ 1220 raidPtr->openings = RAIDOUTSTANDING; 1221 1222 raidinit(rs); 1223 rf_markalldirty(raidPtr); 1224 } 1225 /* free the buffers. No return code here. */ 1226 if (k_cfg->layoutSpecificSize) { 1227 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1228 } 1229 RF_Free(k_cfg, sizeof(RF_Config_t)); 1230 1231 return (retcode); 1232 1233 /* shutdown the system */ 1234 case RAIDFRAME_SHUTDOWN: 1235 1236 part = DISKPART(dev); 1237 pmask = (1 << part); 1238 1239 if ((error = raidlock(rs)) != 0) 1240 return (error); 1241 1242 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 1243 ((rs->sc_dkdev.dk_bopenmask & pmask) && 1244 (rs->sc_dkdev.dk_copenmask & pmask))) 1245 retcode = EBUSY; 1246 else { 1247 rs->sc_flags |= RAIDF_SHUTDOWN; 1248 rs->sc_dkdev.dk_copenmask &= ~pmask; 1249 rs->sc_dkdev.dk_bopenmask &= ~pmask; 1250 rs->sc_dkdev.dk_openmask &= ~pmask; 1251 retcode = 0; 1252 } 1253 1254 raidunlock(rs); 1255 1256 if (retcode != 0) 1257 return retcode; 1258 1259 /* free the pseudo device attach bits */ 1260 1261 cf = device_cfdata(rs->sc_dev); 1262 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0) 1263 free(cf, M_RAIDFRAME); 1264 1265 return (retcode); 1266 case RAIDFRAME_GET_COMPONENT_LABEL: 1267 clabel_ptr = (RF_ComponentLabel_t **) data; 1268 /* need to read the component label for the disk indicated 1269 by row,column in clabel */ 1270 1271 /* 1272 * Perhaps there should be an option to skip the in-core 1273 * copy and hit the disk, as with disklabel(8). 1274 */ 1275 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *)); 1276 1277 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel)); 1278 1279 if (retcode) { 1280 RF_Free(clabel, sizeof(*clabel)); 1281 return retcode; 1282 } 1283 1284 clabel->row = 0; /* Don't allow looking at anything else.*/ 1285 1286 column = clabel->column; 1287 1288 if ((column < 0) || (column >= raidPtr->numCol + 1289 raidPtr->numSpare)) { 1290 RF_Free(clabel, sizeof(*clabel)); 1291 return EINVAL; 1292 } 1293 1294 RF_Free(clabel, sizeof(*clabel)); 1295 1296 clabel = raidget_component_label(raidPtr, column); 1297 1298 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr)); 1299 1300 #if 0 1301 case RAIDFRAME_SET_COMPONENT_LABEL: 1302 clabel = (RF_ComponentLabel_t *) data; 1303 1304 /* XXX check the label for valid stuff... */ 1305 /* Note that some things *should not* get modified -- 1306 the user should be re-initing the labels instead of 1307 trying to patch things. 1308 */ 1309 1310 raidid = raidPtr->raidid; 1311 #ifdef DEBUG 1312 printf("raid%d: Got component label:\n", raidid); 1313 printf("raid%d: Version: %d\n", raidid, clabel->version); 1314 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1315 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1316 printf("raid%d: Column: %d\n", raidid, clabel->column); 1317 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1318 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1319 printf("raid%d: Status: %d\n", raidid, clabel->status); 1320 #endif 1321 clabel->row = 0; 1322 column = clabel->column; 1323 1324 if ((column < 0) || (column >= raidPtr->numCol)) { 1325 return(EINVAL); 1326 } 1327 1328 /* XXX this isn't allowed to do anything for now :-) */ 1329 1330 /* XXX and before it is, we need to fill in the rest 1331 of the fields!?!?!?! */ 1332 memcpy(raidget_component_label(raidPtr, column), 1333 clabel, sizeof(*clabel)); 1334 raidflush_component_label(raidPtr, column); 1335 return (0); 1336 #endif 1337 1338 case RAIDFRAME_INIT_LABELS: 1339 clabel = (RF_ComponentLabel_t *) data; 1340 /* 1341 we only want the serial number from 1342 the above. We get all the rest of the information 1343 from the config that was used to create this RAID 1344 set. 1345 */ 1346 1347 raidPtr->serial_number = clabel->serial_number; 1348 1349 for(column=0;column<raidPtr->numCol;column++) { 1350 diskPtr = &raidPtr->Disks[column]; 1351 if (!RF_DEAD_DISK(diskPtr->status)) { 1352 ci_label = raidget_component_label(raidPtr, 1353 column); 1354 /* Zeroing this is important. */ 1355 memset(ci_label, 0, sizeof(*ci_label)); 1356 raid_init_component_label(raidPtr, ci_label); 1357 ci_label->serial_number = 1358 raidPtr->serial_number; 1359 ci_label->row = 0; /* we dont' pretend to support more */ 1360 rf_component_label_set_partitionsize(ci_label, 1361 diskPtr->partitionSize); 1362 ci_label->column = column; 1363 raidflush_component_label(raidPtr, column); 1364 } 1365 /* XXXjld what about the spares? */ 1366 } 1367 1368 return (retcode); 1369 case RAIDFRAME_SET_AUTOCONFIG: 1370 d = rf_set_autoconfig(raidPtr, *(int *) data); 1371 printf("raid%d: New autoconfig value is: %d\n", 1372 raidPtr->raidid, d); 1373 *(int *) data = d; 1374 return (retcode); 1375 1376 case RAIDFRAME_SET_ROOT: 1377 d = rf_set_rootpartition(raidPtr, *(int *) data); 1378 printf("raid%d: New rootpartition value is: %d\n", 1379 raidPtr->raidid, d); 1380 *(int *) data = d; 1381 return (retcode); 1382 1383 /* initialize all parity */ 1384 case RAIDFRAME_REWRITEPARITY: 1385 1386 if (raidPtr->Layout.map->faultsTolerated == 0) { 1387 /* Parity for RAID 0 is trivially correct */ 1388 raidPtr->parity_good = RF_RAID_CLEAN; 1389 return(0); 1390 } 1391 1392 if (raidPtr->parity_rewrite_in_progress == 1) { 1393 /* Re-write is already in progress! */ 1394 return(EINVAL); 1395 } 1396 1397 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1398 rf_RewriteParityThread, 1399 raidPtr,"raid_parity"); 1400 return (retcode); 1401 1402 1403 case RAIDFRAME_ADD_HOT_SPARE: 1404 sparePtr = (RF_SingleComponent_t *) data; 1405 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t)); 1406 retcode = rf_add_hot_spare(raidPtr, &component); 1407 return(retcode); 1408 1409 case RAIDFRAME_REMOVE_HOT_SPARE: 1410 return(retcode); 1411 1412 case RAIDFRAME_DELETE_COMPONENT: 1413 componentPtr = (RF_SingleComponent_t *)data; 1414 memcpy( &component, componentPtr, 1415 sizeof(RF_SingleComponent_t)); 1416 retcode = rf_delete_component(raidPtr, &component); 1417 return(retcode); 1418 1419 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1420 componentPtr = (RF_SingleComponent_t *)data; 1421 memcpy( &component, componentPtr, 1422 sizeof(RF_SingleComponent_t)); 1423 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1424 return(retcode); 1425 1426 case RAIDFRAME_REBUILD_IN_PLACE: 1427 1428 if (raidPtr->Layout.map->faultsTolerated == 0) { 1429 /* Can't do this on a RAID 0!! */ 1430 return(EINVAL); 1431 } 1432 1433 if (raidPtr->recon_in_progress == 1) { 1434 /* a reconstruct is already in progress! */ 1435 return(EINVAL); 1436 } 1437 1438 componentPtr = (RF_SingleComponent_t *) data; 1439 memcpy( &component, componentPtr, 1440 sizeof(RF_SingleComponent_t)); 1441 component.row = 0; /* we don't support any more */ 1442 column = component.column; 1443 1444 if ((column < 0) || (column >= raidPtr->numCol)) { 1445 return(EINVAL); 1446 } 1447 1448 rf_lock_mutex2(raidPtr->mutex); 1449 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1450 (raidPtr->numFailures > 0)) { 1451 /* XXX 0 above shouldn't be constant!!! */ 1452 /* some component other than this has failed. 1453 Let's not make things worse than they already 1454 are... */ 1455 printf("raid%d: Unable to reconstruct to disk at:\n", 1456 raidPtr->raidid); 1457 printf("raid%d: Col: %d Too many failures.\n", 1458 raidPtr->raidid, column); 1459 rf_unlock_mutex2(raidPtr->mutex); 1460 return (EINVAL); 1461 } 1462 if (raidPtr->Disks[column].status == 1463 rf_ds_reconstructing) { 1464 printf("raid%d: Unable to reconstruct to disk at:\n", 1465 raidPtr->raidid); 1466 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column); 1467 1468 rf_unlock_mutex2(raidPtr->mutex); 1469 return (EINVAL); 1470 } 1471 if (raidPtr->Disks[column].status == rf_ds_spared) { 1472 rf_unlock_mutex2(raidPtr->mutex); 1473 return (EINVAL); 1474 } 1475 rf_unlock_mutex2(raidPtr->mutex); 1476 1477 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1478 if (rrcopy == NULL) 1479 return(ENOMEM); 1480 1481 rrcopy->raidPtr = (void *) raidPtr; 1482 rrcopy->col = column; 1483 1484 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1485 rf_ReconstructInPlaceThread, 1486 rrcopy,"raid_reconip"); 1487 return(retcode); 1488 1489 case RAIDFRAME_GET_INFO: 1490 if (!raidPtr->valid) 1491 return (ENODEV); 1492 ucfgp = (RF_DeviceConfig_t **) data; 1493 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1494 (RF_DeviceConfig_t *)); 1495 if (d_cfg == NULL) 1496 return (ENOMEM); 1497 d_cfg->rows = 1; /* there is only 1 row now */ 1498 d_cfg->cols = raidPtr->numCol; 1499 d_cfg->ndevs = raidPtr->numCol; 1500 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1501 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1502 return (ENOMEM); 1503 } 1504 d_cfg->nspares = raidPtr->numSpare; 1505 if (d_cfg->nspares >= RF_MAX_DISKS) { 1506 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1507 return (ENOMEM); 1508 } 1509 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1510 d = 0; 1511 for (j = 0; j < d_cfg->cols; j++) { 1512 d_cfg->devs[d] = raidPtr->Disks[j]; 1513 d++; 1514 } 1515 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1516 d_cfg->spares[i] = raidPtr->Disks[j]; 1517 } 1518 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1519 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1520 1521 return (retcode); 1522 1523 case RAIDFRAME_CHECK_PARITY: 1524 *(int *) data = raidPtr->parity_good; 1525 return (0); 1526 1527 case RAIDFRAME_PARITYMAP_STATUS: 1528 if (rf_paritymap_ineligible(raidPtr)) 1529 return EINVAL; 1530 rf_paritymap_status(raidPtr->parity_map, 1531 (struct rf_pmstat *)data); 1532 return 0; 1533 1534 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1535 if (rf_paritymap_ineligible(raidPtr)) 1536 return EINVAL; 1537 if (raidPtr->parity_map == NULL) 1538 return ENOENT; /* ??? */ 1539 if (0 != rf_paritymap_set_params(raidPtr->parity_map, 1540 (struct rf_pmparams *)data, 1)) 1541 return EINVAL; 1542 return 0; 1543 1544 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1545 if (rf_paritymap_ineligible(raidPtr)) 1546 return EINVAL; 1547 *(int *) data = rf_paritymap_get_disable(raidPtr); 1548 return 0; 1549 1550 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1551 if (rf_paritymap_ineligible(raidPtr)) 1552 return EINVAL; 1553 rf_paritymap_set_disable(raidPtr, *(int *)data); 1554 /* XXX should errors be passed up? */ 1555 return 0; 1556 1557 case RAIDFRAME_RESET_ACCTOTALS: 1558 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1559 return (0); 1560 1561 case RAIDFRAME_GET_ACCTOTALS: 1562 totals = (RF_AccTotals_t *) data; 1563 *totals = raidPtr->acc_totals; 1564 return (0); 1565 1566 case RAIDFRAME_KEEP_ACCTOTALS: 1567 raidPtr->keep_acc_totals = *(int *)data; 1568 return (0); 1569 1570 case RAIDFRAME_GET_SIZE: 1571 *(int *) data = raidPtr->totalSectors; 1572 return (0); 1573 1574 /* fail a disk & optionally start reconstruction */ 1575 case RAIDFRAME_FAIL_DISK: 1576 1577 if (raidPtr->Layout.map->faultsTolerated == 0) { 1578 /* Can't do this on a RAID 0!! */ 1579 return(EINVAL); 1580 } 1581 1582 rr = (struct rf_recon_req *) data; 1583 rr->row = 0; 1584 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1585 return (EINVAL); 1586 1587 1588 rf_lock_mutex2(raidPtr->mutex); 1589 if (raidPtr->status == rf_rs_reconstructing) { 1590 /* you can't fail a disk while we're reconstructing! */ 1591 /* XXX wrong for RAID6 */ 1592 rf_unlock_mutex2(raidPtr->mutex); 1593 return (EINVAL); 1594 } 1595 if ((raidPtr->Disks[rr->col].status == 1596 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1597 /* some other component has failed. Let's not make 1598 things worse. XXX wrong for RAID6 */ 1599 rf_unlock_mutex2(raidPtr->mutex); 1600 return (EINVAL); 1601 } 1602 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1603 /* Can't fail a spared disk! */ 1604 rf_unlock_mutex2(raidPtr->mutex); 1605 return (EINVAL); 1606 } 1607 rf_unlock_mutex2(raidPtr->mutex); 1608 1609 /* make a copy of the recon request so that we don't rely on 1610 * the user's buffer */ 1611 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1612 if (rrcopy == NULL) 1613 return(ENOMEM); 1614 memcpy(rrcopy, rr, sizeof(*rr)); 1615 rrcopy->raidPtr = (void *) raidPtr; 1616 1617 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1618 rf_ReconThread, 1619 rrcopy,"raid_recon"); 1620 return (0); 1621 1622 /* invoke a copyback operation after recon on whatever disk 1623 * needs it, if any */ 1624 case RAIDFRAME_COPYBACK: 1625 1626 if (raidPtr->Layout.map->faultsTolerated == 0) { 1627 /* This makes no sense on a RAID 0!! */ 1628 return(EINVAL); 1629 } 1630 1631 if (raidPtr->copyback_in_progress == 1) { 1632 /* Copyback is already in progress! */ 1633 return(EINVAL); 1634 } 1635 1636 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1637 rf_CopybackThread, 1638 raidPtr,"raid_copyback"); 1639 return (retcode); 1640 1641 /* return the percentage completion of reconstruction */ 1642 case RAIDFRAME_CHECK_RECON_STATUS: 1643 if (raidPtr->Layout.map->faultsTolerated == 0) { 1644 /* This makes no sense on a RAID 0, so tell the 1645 user it's done. */ 1646 *(int *) data = 100; 1647 return(0); 1648 } 1649 if (raidPtr->status != rf_rs_reconstructing) 1650 *(int *) data = 100; 1651 else { 1652 if (raidPtr->reconControl->numRUsTotal > 0) { 1653 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1654 } else { 1655 *(int *) data = 0; 1656 } 1657 } 1658 return (0); 1659 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1660 progressInfoPtr = (RF_ProgressInfo_t **) data; 1661 if (raidPtr->status != rf_rs_reconstructing) { 1662 progressInfo.remaining = 0; 1663 progressInfo.completed = 100; 1664 progressInfo.total = 100; 1665 } else { 1666 progressInfo.total = 1667 raidPtr->reconControl->numRUsTotal; 1668 progressInfo.completed = 1669 raidPtr->reconControl->numRUsComplete; 1670 progressInfo.remaining = progressInfo.total - 1671 progressInfo.completed; 1672 } 1673 retcode = copyout(&progressInfo, *progressInfoPtr, 1674 sizeof(RF_ProgressInfo_t)); 1675 return (retcode); 1676 1677 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1678 if (raidPtr->Layout.map->faultsTolerated == 0) { 1679 /* This makes no sense on a RAID 0, so tell the 1680 user it's done. */ 1681 *(int *) data = 100; 1682 return(0); 1683 } 1684 if (raidPtr->parity_rewrite_in_progress == 1) { 1685 *(int *) data = 100 * 1686 raidPtr->parity_rewrite_stripes_done / 1687 raidPtr->Layout.numStripe; 1688 } else { 1689 *(int *) data = 100; 1690 } 1691 return (0); 1692 1693 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1694 progressInfoPtr = (RF_ProgressInfo_t **) data; 1695 if (raidPtr->parity_rewrite_in_progress == 1) { 1696 progressInfo.total = raidPtr->Layout.numStripe; 1697 progressInfo.completed = 1698 raidPtr->parity_rewrite_stripes_done; 1699 progressInfo.remaining = progressInfo.total - 1700 progressInfo.completed; 1701 } else { 1702 progressInfo.remaining = 0; 1703 progressInfo.completed = 100; 1704 progressInfo.total = 100; 1705 } 1706 retcode = copyout(&progressInfo, *progressInfoPtr, 1707 sizeof(RF_ProgressInfo_t)); 1708 return (retcode); 1709 1710 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1711 if (raidPtr->Layout.map->faultsTolerated == 0) { 1712 /* This makes no sense on a RAID 0 */ 1713 *(int *) data = 100; 1714 return(0); 1715 } 1716 if (raidPtr->copyback_in_progress == 1) { 1717 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1718 raidPtr->Layout.numStripe; 1719 } else { 1720 *(int *) data = 100; 1721 } 1722 return (0); 1723 1724 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1725 progressInfoPtr = (RF_ProgressInfo_t **) data; 1726 if (raidPtr->copyback_in_progress == 1) { 1727 progressInfo.total = raidPtr->Layout.numStripe; 1728 progressInfo.completed = 1729 raidPtr->copyback_stripes_done; 1730 progressInfo.remaining = progressInfo.total - 1731 progressInfo.completed; 1732 } else { 1733 progressInfo.remaining = 0; 1734 progressInfo.completed = 100; 1735 progressInfo.total = 100; 1736 } 1737 retcode = copyout(&progressInfo, *progressInfoPtr, 1738 sizeof(RF_ProgressInfo_t)); 1739 return (retcode); 1740 1741 /* the sparetable daemon calls this to wait for the kernel to 1742 * need a spare table. this ioctl does not return until a 1743 * spare table is needed. XXX -- calling mpsleep here in the 1744 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1745 * -- I should either compute the spare table in the kernel, 1746 * or have a different -- XXX XXX -- interface (a different 1747 * character device) for delivering the table -- XXX */ 1748 #if 0 1749 case RAIDFRAME_SPARET_WAIT: 1750 rf_lock_mutex2(rf_sparet_wait_mutex); 1751 while (!rf_sparet_wait_queue) 1752 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex); 1753 waitreq = rf_sparet_wait_queue; 1754 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1755 rf_unlock_mutex2(rf_sparet_wait_mutex); 1756 1757 /* structure assignment */ 1758 *((RF_SparetWait_t *) data) = *waitreq; 1759 1760 RF_Free(waitreq, sizeof(*waitreq)); 1761 return (0); 1762 1763 /* wakes up a process waiting on SPARET_WAIT and puts an error 1764 * code in it that will cause the dameon to exit */ 1765 case RAIDFRAME_ABORT_SPARET_WAIT: 1766 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1767 waitreq->fcol = -1; 1768 rf_lock_mutex2(rf_sparet_wait_mutex); 1769 waitreq->next = rf_sparet_wait_queue; 1770 rf_sparet_wait_queue = waitreq; 1771 rf_broadcast_conf2(rf_sparet_wait_cv); 1772 rf_unlock_mutex2(rf_sparet_wait_mutex); 1773 return (0); 1774 1775 /* used by the spare table daemon to deliver a spare table 1776 * into the kernel */ 1777 case RAIDFRAME_SEND_SPARET: 1778 1779 /* install the spare table */ 1780 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1781 1782 /* respond to the requestor. the return status of the spare 1783 * table installation is passed in the "fcol" field */ 1784 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1785 waitreq->fcol = retcode; 1786 rf_lock_mutex2(rf_sparet_wait_mutex); 1787 waitreq->next = rf_sparet_resp_queue; 1788 rf_sparet_resp_queue = waitreq; 1789 rf_broadcast_cond2(rf_sparet_resp_cv); 1790 rf_unlock_mutex2(rf_sparet_wait_mutex); 1791 1792 return (retcode); 1793 #endif 1794 1795 default: 1796 break; /* fall through to the os-specific code below */ 1797 1798 } 1799 1800 if (!raidPtr->valid) 1801 return (EINVAL); 1802 1803 /* 1804 * Add support for "regular" device ioctls here. 1805 */ 1806 1807 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l); 1808 if (error != EPASSTHROUGH) 1809 return (error); 1810 1811 switch (cmd) { 1812 case DIOCGDINFO: 1813 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1814 break; 1815 #ifdef __HAVE_OLD_DISKLABEL 1816 case ODIOCGDINFO: 1817 newlabel = *(rs->sc_dkdev.dk_label); 1818 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1819 return ENOTTY; 1820 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1821 break; 1822 #endif 1823 1824 case DIOCGPART: 1825 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1826 ((struct partinfo *) data)->part = 1827 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1828 break; 1829 1830 case DIOCWDINFO: 1831 case DIOCSDINFO: 1832 #ifdef __HAVE_OLD_DISKLABEL 1833 case ODIOCWDINFO: 1834 case ODIOCSDINFO: 1835 #endif 1836 { 1837 struct disklabel *lp; 1838 #ifdef __HAVE_OLD_DISKLABEL 1839 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1840 memset(&newlabel, 0, sizeof newlabel); 1841 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1842 lp = &newlabel; 1843 } else 1844 #endif 1845 lp = (struct disklabel *)data; 1846 1847 if ((error = raidlock(rs)) != 0) 1848 return (error); 1849 1850 rs->sc_flags |= RAIDF_LABELLING; 1851 1852 error = setdisklabel(rs->sc_dkdev.dk_label, 1853 lp, 0, rs->sc_dkdev.dk_cpulabel); 1854 if (error == 0) { 1855 if (cmd == DIOCWDINFO 1856 #ifdef __HAVE_OLD_DISKLABEL 1857 || cmd == ODIOCWDINFO 1858 #endif 1859 ) 1860 error = writedisklabel(RAIDLABELDEV(dev), 1861 raidstrategy, rs->sc_dkdev.dk_label, 1862 rs->sc_dkdev.dk_cpulabel); 1863 } 1864 rs->sc_flags &= ~RAIDF_LABELLING; 1865 1866 raidunlock(rs); 1867 1868 if (error) 1869 return (error); 1870 break; 1871 } 1872 1873 case DIOCWLABEL: 1874 if (*(int *) data != 0) 1875 rs->sc_flags |= RAIDF_WLABEL; 1876 else 1877 rs->sc_flags &= ~RAIDF_WLABEL; 1878 break; 1879 1880 case DIOCGDEFLABEL: 1881 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1882 break; 1883 1884 #ifdef __HAVE_OLD_DISKLABEL 1885 case ODIOCGDEFLABEL: 1886 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1887 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1888 return ENOTTY; 1889 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1890 break; 1891 #endif 1892 1893 case DIOCAWEDGE: 1894 case DIOCDWEDGE: 1895 dkw = (void *)data; 1896 1897 /* If the ioctl happens here, the parent is us. */ 1898 (void)strcpy(dkw->dkw_parent, rs->sc_xname); 1899 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw); 1900 1901 case DIOCLWEDGES: 1902 return dkwedge_list(&rs->sc_dkdev, 1903 (struct dkwedge_list *)data, l); 1904 case DIOCCACHESYNC: 1905 return rf_sync_component_caches(raidPtr); 1906 1907 case DIOCGSTRATEGY: 1908 { 1909 struct disk_strategy *dks = (void *)data; 1910 1911 s = splbio(); 1912 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue), 1913 sizeof(dks->dks_name)); 1914 splx(s); 1915 dks->dks_paramlen = 0; 1916 1917 return 0; 1918 } 1919 1920 case DIOCSSTRATEGY: 1921 { 1922 struct disk_strategy *dks = (void *)data; 1923 struct bufq_state *new; 1924 struct bufq_state *old; 1925 1926 if (dks->dks_param != NULL) { 1927 return EINVAL; 1928 } 1929 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */ 1930 error = bufq_alloc(&new, dks->dks_name, 1931 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK); 1932 if (error) { 1933 return error; 1934 } 1935 s = splbio(); 1936 old = rs->buf_queue; 1937 bufq_move(new, old); 1938 rs->buf_queue = new; 1939 splx(s); 1940 bufq_free(old); 1941 1942 return 0; 1943 } 1944 1945 default: 1946 retcode = ENOTTY; 1947 } 1948 return (retcode); 1949 1950 } 1951 1952 1953 /* raidinit -- complete the rest of the initialization for the 1954 RAIDframe device. */ 1955 1956 1957 static void 1958 raidinit(struct raid_softc *rs) 1959 { 1960 cfdata_t cf; 1961 int unit; 1962 RF_Raid_t *raidPtr = &rs->sc_r; 1963 1964 unit = raidPtr->raidid; 1965 1966 1967 /* XXX should check return code first... */ 1968 rs->sc_flags |= RAIDF_INITED; 1969 1970 /* XXX doesn't check bounds. */ 1971 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1972 1973 /* attach the pseudo device */ 1974 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1975 cf->cf_name = raid_cd.cd_name; 1976 cf->cf_atname = raid_cd.cd_name; 1977 cf->cf_unit = unit; 1978 cf->cf_fstate = FSTATE_STAR; 1979 1980 rs->sc_dev = config_attach_pseudo(cf); 1981 1982 if (rs->sc_dev == NULL) { 1983 printf("raid%d: config_attach_pseudo failed\n", 1984 raidPtr->raidid); 1985 rs->sc_flags &= ~RAIDF_INITED; 1986 free(cf, M_RAIDFRAME); 1987 return; 1988 } 1989 1990 /* disk_attach actually creates space for the CPU disklabel, among 1991 * other things, so it's critical to call this *BEFORE* we try putzing 1992 * with disklabels. */ 1993 1994 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver); 1995 disk_attach(&rs->sc_dkdev); 1996 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector); 1997 1998 /* XXX There may be a weird interaction here between this, and 1999 * protectedSectors, as used in RAIDframe. */ 2000 2001 rs->sc_size = raidPtr->totalSectors; 2002 2003 dkwedge_discover(&rs->sc_dkdev); 2004 2005 rf_set_geometry(rs, raidPtr); 2006 2007 } 2008 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 2009 /* wake up the daemon & tell it to get us a spare table 2010 * XXX 2011 * the entries in the queues should be tagged with the raidPtr 2012 * so that in the extremely rare case that two recons happen at once, 2013 * we know for which device were requesting a spare table 2014 * XXX 2015 * 2016 * XXX This code is not currently used. GO 2017 */ 2018 int 2019 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 2020 { 2021 int retcode; 2022 2023 rf_lock_mutex2(rf_sparet_wait_mutex); 2024 req->next = rf_sparet_wait_queue; 2025 rf_sparet_wait_queue = req; 2026 rf_broadcast_cond2(rf_sparet_wait_cv); 2027 2028 /* mpsleep unlocks the mutex */ 2029 while (!rf_sparet_resp_queue) { 2030 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex); 2031 } 2032 req = rf_sparet_resp_queue; 2033 rf_sparet_resp_queue = req->next; 2034 rf_unlock_mutex2(rf_sparet_wait_mutex); 2035 2036 retcode = req->fcol; 2037 RF_Free(req, sizeof(*req)); /* this is not the same req as we 2038 * alloc'd */ 2039 return (retcode); 2040 } 2041 #endif 2042 2043 /* a wrapper around rf_DoAccess that extracts appropriate info from the 2044 * bp & passes it down. 2045 * any calls originating in the kernel must use non-blocking I/O 2046 * do some extra sanity checking to return "appropriate" error values for 2047 * certain conditions (to make some standard utilities work) 2048 * 2049 * Formerly known as: rf_DoAccessKernel 2050 */ 2051 void 2052 raidstart(RF_Raid_t *raidPtr) 2053 { 2054 RF_SectorCount_t num_blocks, pb, sum; 2055 RF_RaidAddr_t raid_addr; 2056 struct partition *pp; 2057 daddr_t blocknum; 2058 struct raid_softc *rs; 2059 int do_async; 2060 struct buf *bp; 2061 int rc; 2062 2063 rs = raidPtr->softc; 2064 /* quick check to see if anything has died recently */ 2065 rf_lock_mutex2(raidPtr->mutex); 2066 if (raidPtr->numNewFailures > 0) { 2067 rf_unlock_mutex2(raidPtr->mutex); 2068 rf_update_component_labels(raidPtr, 2069 RF_NORMAL_COMPONENT_UPDATE); 2070 rf_lock_mutex2(raidPtr->mutex); 2071 raidPtr->numNewFailures--; 2072 } 2073 2074 /* Check to see if we're at the limit... */ 2075 while (raidPtr->openings > 0) { 2076 rf_unlock_mutex2(raidPtr->mutex); 2077 2078 /* get the next item, if any, from the queue */ 2079 if ((bp = bufq_get(rs->buf_queue)) == NULL) { 2080 /* nothing more to do */ 2081 return; 2082 } 2083 2084 /* Ok, for the bp we have here, bp->b_blkno is relative to the 2085 * partition.. Need to make it absolute to the underlying 2086 * device.. */ 2087 2088 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector; 2089 if (DISKPART(bp->b_dev) != RAW_PART) { 2090 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 2091 blocknum += pp->p_offset; 2092 } 2093 2094 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 2095 (int) blocknum)); 2096 2097 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 2098 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 2099 2100 /* *THIS* is where we adjust what block we're going to... 2101 * but DO NOT TOUCH bp->b_blkno!!! */ 2102 raid_addr = blocknum; 2103 2104 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 2105 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 2106 sum = raid_addr + num_blocks + pb; 2107 if (1 || rf_debugKernelAccess) { 2108 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 2109 (int) raid_addr, (int) sum, (int) num_blocks, 2110 (int) pb, (int) bp->b_resid)); 2111 } 2112 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 2113 || (sum < num_blocks) || (sum < pb)) { 2114 bp->b_error = ENOSPC; 2115 bp->b_resid = bp->b_bcount; 2116 biodone(bp); 2117 rf_lock_mutex2(raidPtr->mutex); 2118 continue; 2119 } 2120 /* 2121 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 2122 */ 2123 2124 if (bp->b_bcount & raidPtr->sectorMask) { 2125 bp->b_error = EINVAL; 2126 bp->b_resid = bp->b_bcount; 2127 biodone(bp); 2128 rf_lock_mutex2(raidPtr->mutex); 2129 continue; 2130 2131 } 2132 db1_printf(("Calling DoAccess..\n")); 2133 2134 2135 rf_lock_mutex2(raidPtr->mutex); 2136 raidPtr->openings--; 2137 rf_unlock_mutex2(raidPtr->mutex); 2138 2139 /* 2140 * Everything is async. 2141 */ 2142 do_async = 1; 2143 2144 disk_busy(&rs->sc_dkdev); 2145 2146 /* XXX we're still at splbio() here... do we *really* 2147 need to be? */ 2148 2149 /* don't ever condition on bp->b_flags & B_WRITE. 2150 * always condition on B_READ instead */ 2151 2152 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 2153 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 2154 do_async, raid_addr, num_blocks, 2155 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 2156 2157 if (rc) { 2158 bp->b_error = rc; 2159 bp->b_resid = bp->b_bcount; 2160 biodone(bp); 2161 /* continue loop */ 2162 } 2163 2164 rf_lock_mutex2(raidPtr->mutex); 2165 } 2166 rf_unlock_mutex2(raidPtr->mutex); 2167 } 2168 2169 2170 2171 2172 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 2173 2174 int 2175 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 2176 { 2177 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2178 struct buf *bp; 2179 2180 req->queue = queue; 2181 bp = req->bp; 2182 2183 switch (req->type) { 2184 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2185 /* XXX need to do something extra here.. */ 2186 /* I'm leaving this in, as I've never actually seen it used, 2187 * and I'd like folks to report it... GO */ 2188 printf(("WAKEUP CALLED\n")); 2189 queue->numOutstanding++; 2190 2191 bp->b_flags = 0; 2192 bp->b_private = req; 2193 2194 KernelWakeupFunc(bp); 2195 break; 2196 2197 case RF_IO_TYPE_READ: 2198 case RF_IO_TYPE_WRITE: 2199 #if RF_ACC_TRACE > 0 2200 if (req->tracerec) { 2201 RF_ETIMER_START(req->tracerec->timer); 2202 } 2203 #endif 2204 InitBP(bp, queue->rf_cinfo->ci_vp, 2205 op, queue->rf_cinfo->ci_dev, 2206 req->sectorOffset, req->numSector, 2207 req->buf, KernelWakeupFunc, (void *) req, 2208 queue->raidPtr->logBytesPerSector, req->b_proc); 2209 2210 if (rf_debugKernelAccess) { 2211 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2212 (long) bp->b_blkno)); 2213 } 2214 queue->numOutstanding++; 2215 queue->last_deq_sector = req->sectorOffset; 2216 /* acc wouldn't have been let in if there were any pending 2217 * reqs at any other priority */ 2218 queue->curPriority = req->priority; 2219 2220 db1_printf(("Going for %c to unit %d col %d\n", 2221 req->type, queue->raidPtr->raidid, 2222 queue->col)); 2223 db1_printf(("sector %d count %d (%d bytes) %d\n", 2224 (int) req->sectorOffset, (int) req->numSector, 2225 (int) (req->numSector << 2226 queue->raidPtr->logBytesPerSector), 2227 (int) queue->raidPtr->logBytesPerSector)); 2228 2229 /* 2230 * XXX: drop lock here since this can block at 2231 * least with backing SCSI devices. Retake it 2232 * to minimize fuss with calling interfaces. 2233 */ 2234 2235 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam"); 2236 bdev_strategy(bp); 2237 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam"); 2238 break; 2239 2240 default: 2241 panic("bad req->type in rf_DispatchKernelIO"); 2242 } 2243 db1_printf(("Exiting from DispatchKernelIO\n")); 2244 2245 return (0); 2246 } 2247 /* this is the callback function associated with a I/O invoked from 2248 kernel code. 2249 */ 2250 static void 2251 KernelWakeupFunc(struct buf *bp) 2252 { 2253 RF_DiskQueueData_t *req = NULL; 2254 RF_DiskQueue_t *queue; 2255 2256 db1_printf(("recovering the request queue:\n")); 2257 2258 req = bp->b_private; 2259 2260 queue = (RF_DiskQueue_t *) req->queue; 2261 2262 rf_lock_mutex2(queue->raidPtr->iodone_lock); 2263 2264 #if RF_ACC_TRACE > 0 2265 if (req->tracerec) { 2266 RF_ETIMER_STOP(req->tracerec->timer); 2267 RF_ETIMER_EVAL(req->tracerec->timer); 2268 rf_lock_mutex2(rf_tracing_mutex); 2269 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2270 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2271 req->tracerec->num_phys_ios++; 2272 rf_unlock_mutex2(rf_tracing_mutex); 2273 } 2274 #endif 2275 2276 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2277 * ballistic, and mark the component as hosed... */ 2278 2279 if (bp->b_error != 0) { 2280 /* Mark the disk as dead */ 2281 /* but only mark it once... */ 2282 /* and only if it wouldn't leave this RAID set 2283 completely broken */ 2284 if (((queue->raidPtr->Disks[queue->col].status == 2285 rf_ds_optimal) || 2286 (queue->raidPtr->Disks[queue->col].status == 2287 rf_ds_used_spare)) && 2288 (queue->raidPtr->numFailures < 2289 queue->raidPtr->Layout.map->faultsTolerated)) { 2290 printf("raid%d: IO Error. Marking %s as failed.\n", 2291 queue->raidPtr->raidid, 2292 queue->raidPtr->Disks[queue->col].devname); 2293 queue->raidPtr->Disks[queue->col].status = 2294 rf_ds_failed; 2295 queue->raidPtr->status = rf_rs_degraded; 2296 queue->raidPtr->numFailures++; 2297 queue->raidPtr->numNewFailures++; 2298 } else { /* Disk is already dead... */ 2299 /* printf("Disk already marked as dead!\n"); */ 2300 } 2301 2302 } 2303 2304 /* Fill in the error value */ 2305 req->error = bp->b_error; 2306 2307 /* Drop this one on the "finished" queue... */ 2308 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2309 2310 /* Let the raidio thread know there is work to be done. */ 2311 rf_signal_cond2(queue->raidPtr->iodone_cv); 2312 2313 rf_unlock_mutex2(queue->raidPtr->iodone_lock); 2314 } 2315 2316 2317 /* 2318 * initialize a buf structure for doing an I/O in the kernel. 2319 */ 2320 static void 2321 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2322 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2323 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 2324 struct proc *b_proc) 2325 { 2326 /* bp->b_flags = B_PHYS | rw_flag; */ 2327 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */ 2328 bp->b_oflags = 0; 2329 bp->b_cflags = 0; 2330 bp->b_bcount = numSect << logBytesPerSector; 2331 bp->b_bufsize = bp->b_bcount; 2332 bp->b_error = 0; 2333 bp->b_dev = dev; 2334 bp->b_data = bf; 2335 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT; 2336 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2337 if (bp->b_bcount == 0) { 2338 panic("bp->b_bcount is zero in InitBP!!"); 2339 } 2340 bp->b_proc = b_proc; 2341 bp->b_iodone = cbFunc; 2342 bp->b_private = cbArg; 2343 } 2344 2345 static void 2346 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 2347 struct disklabel *lp) 2348 { 2349 memset(lp, 0, sizeof(*lp)); 2350 2351 /* fabricate a label... */ 2352 lp->d_secperunit = raidPtr->totalSectors; 2353 lp->d_secsize = raidPtr->bytesPerSector; 2354 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2355 lp->d_ntracks = 4 * raidPtr->numCol; 2356 lp->d_ncylinders = raidPtr->totalSectors / 2357 (lp->d_nsectors * lp->d_ntracks); 2358 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2359 2360 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2361 lp->d_type = DTYPE_RAID; 2362 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2363 lp->d_rpm = 3600; 2364 lp->d_interleave = 1; 2365 lp->d_flags = 0; 2366 2367 lp->d_partitions[RAW_PART].p_offset = 0; 2368 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2369 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2370 lp->d_npartitions = RAW_PART + 1; 2371 2372 lp->d_magic = DISKMAGIC; 2373 lp->d_magic2 = DISKMAGIC; 2374 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2375 2376 } 2377 /* 2378 * Read the disklabel from the raid device. If one is not present, fake one 2379 * up. 2380 */ 2381 static void 2382 raidgetdisklabel(dev_t dev) 2383 { 2384 int unit = raidunit(dev); 2385 struct raid_softc *rs; 2386 const char *errstring; 2387 struct disklabel *lp; 2388 struct cpu_disklabel *clp; 2389 RF_Raid_t *raidPtr; 2390 2391 if ((rs = raidget(unit)) == NULL) 2392 return; 2393 2394 lp = rs->sc_dkdev.dk_label; 2395 clp = rs->sc_dkdev.dk_cpulabel; 2396 2397 db1_printf(("Getting the disklabel...\n")); 2398 2399 memset(clp, 0, sizeof(*clp)); 2400 2401 raidPtr = &rs->sc_r; 2402 2403 raidgetdefaultlabel(raidPtr, rs, lp); 2404 2405 /* 2406 * Call the generic disklabel extraction routine. 2407 */ 2408 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2409 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2410 if (errstring) 2411 raidmakedisklabel(rs); 2412 else { 2413 int i; 2414 struct partition *pp; 2415 2416 /* 2417 * Sanity check whether the found disklabel is valid. 2418 * 2419 * This is necessary since total size of the raid device 2420 * may vary when an interleave is changed even though exactly 2421 * same components are used, and old disklabel may used 2422 * if that is found. 2423 */ 2424 if (lp->d_secperunit != rs->sc_size) 2425 printf("raid%d: WARNING: %s: " 2426 "total sector size in disklabel (%" PRIu32 ") != " 2427 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname, 2428 lp->d_secperunit, rs->sc_size); 2429 for (i = 0; i < lp->d_npartitions; i++) { 2430 pp = &lp->d_partitions[i]; 2431 if (pp->p_offset + pp->p_size > rs->sc_size) 2432 printf("raid%d: WARNING: %s: end of partition `%c' " 2433 "exceeds the size of raid (%" PRIu64 ")\n", 2434 unit, rs->sc_xname, 'a' + i, rs->sc_size); 2435 } 2436 } 2437 2438 } 2439 /* 2440 * Take care of things one might want to take care of in the event 2441 * that a disklabel isn't present. 2442 */ 2443 static void 2444 raidmakedisklabel(struct raid_softc *rs) 2445 { 2446 struct disklabel *lp = rs->sc_dkdev.dk_label; 2447 db1_printf(("Making a label..\n")); 2448 2449 /* 2450 * For historical reasons, if there's no disklabel present 2451 * the raw partition must be marked FS_BSDFFS. 2452 */ 2453 2454 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2455 2456 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2457 2458 lp->d_checksum = dkcksum(lp); 2459 } 2460 /* 2461 * Wait interruptibly for an exclusive lock. 2462 * 2463 * XXX 2464 * Several drivers do this; it should be abstracted and made MP-safe. 2465 * (Hmm... where have we seen this warning before :-> GO ) 2466 */ 2467 static int 2468 raidlock(struct raid_softc *rs) 2469 { 2470 int error; 2471 2472 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2473 rs->sc_flags |= RAIDF_WANTED; 2474 if ((error = 2475 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2476 return (error); 2477 } 2478 rs->sc_flags |= RAIDF_LOCKED; 2479 return (0); 2480 } 2481 /* 2482 * Unlock and wake up any waiters. 2483 */ 2484 static void 2485 raidunlock(struct raid_softc *rs) 2486 { 2487 2488 rs->sc_flags &= ~RAIDF_LOCKED; 2489 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2490 rs->sc_flags &= ~RAIDF_WANTED; 2491 wakeup(rs); 2492 } 2493 } 2494 2495 2496 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2497 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2498 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE 2499 2500 static daddr_t 2501 rf_component_info_offset(void) 2502 { 2503 2504 return RF_COMPONENT_INFO_OFFSET; 2505 } 2506 2507 static daddr_t 2508 rf_component_info_size(unsigned secsize) 2509 { 2510 daddr_t info_size; 2511 2512 KASSERT(secsize); 2513 if (secsize > RF_COMPONENT_INFO_SIZE) 2514 info_size = secsize; 2515 else 2516 info_size = RF_COMPONENT_INFO_SIZE; 2517 2518 return info_size; 2519 } 2520 2521 static daddr_t 2522 rf_parity_map_offset(RF_Raid_t *raidPtr) 2523 { 2524 daddr_t map_offset; 2525 2526 KASSERT(raidPtr->bytesPerSector); 2527 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE) 2528 map_offset = raidPtr->bytesPerSector; 2529 else 2530 map_offset = RF_COMPONENT_INFO_SIZE; 2531 map_offset += rf_component_info_offset(); 2532 2533 return map_offset; 2534 } 2535 2536 static daddr_t 2537 rf_parity_map_size(RF_Raid_t *raidPtr) 2538 { 2539 daddr_t map_size; 2540 2541 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE) 2542 map_size = raidPtr->bytesPerSector; 2543 else 2544 map_size = RF_PARITY_MAP_SIZE; 2545 2546 return map_size; 2547 } 2548 2549 int 2550 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col) 2551 { 2552 RF_ComponentLabel_t *clabel; 2553 2554 clabel = raidget_component_label(raidPtr, col); 2555 clabel->clean = RF_RAID_CLEAN; 2556 raidflush_component_label(raidPtr, col); 2557 return(0); 2558 } 2559 2560 2561 int 2562 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col) 2563 { 2564 RF_ComponentLabel_t *clabel; 2565 2566 clabel = raidget_component_label(raidPtr, col); 2567 clabel->clean = RF_RAID_DIRTY; 2568 raidflush_component_label(raidPtr, col); 2569 return(0); 2570 } 2571 2572 int 2573 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2574 { 2575 KASSERT(raidPtr->bytesPerSector); 2576 return raidread_component_label(raidPtr->bytesPerSector, 2577 raidPtr->Disks[col].dev, 2578 raidPtr->raid_cinfo[col].ci_vp, 2579 &raidPtr->raid_cinfo[col].ci_label); 2580 } 2581 2582 RF_ComponentLabel_t * 2583 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2584 { 2585 return &raidPtr->raid_cinfo[col].ci_label; 2586 } 2587 2588 int 2589 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2590 { 2591 RF_ComponentLabel_t *label; 2592 2593 label = &raidPtr->raid_cinfo[col].ci_label; 2594 label->mod_counter = raidPtr->mod_counter; 2595 #ifndef RF_NO_PARITY_MAP 2596 label->parity_map_modcount = label->mod_counter; 2597 #endif 2598 return raidwrite_component_label(raidPtr->bytesPerSector, 2599 raidPtr->Disks[col].dev, 2600 raidPtr->raid_cinfo[col].ci_vp, label); 2601 } 2602 2603 2604 static int 2605 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2606 RF_ComponentLabel_t *clabel) 2607 { 2608 return raidread_component_area(dev, b_vp, clabel, 2609 sizeof(RF_ComponentLabel_t), 2610 rf_component_info_offset(), 2611 rf_component_info_size(secsize)); 2612 } 2613 2614 /* ARGSUSED */ 2615 static int 2616 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data, 2617 size_t msize, daddr_t offset, daddr_t dsize) 2618 { 2619 struct buf *bp; 2620 const struct bdevsw *bdev; 2621 int error; 2622 2623 /* XXX should probably ensure that we don't try to do this if 2624 someone has changed rf_protected_sectors. */ 2625 2626 if (b_vp == NULL) { 2627 /* For whatever reason, this component is not valid. 2628 Don't try to read a component label from it. */ 2629 return(EINVAL); 2630 } 2631 2632 /* get a block of the appropriate size... */ 2633 bp = geteblk((int)dsize); 2634 bp->b_dev = dev; 2635 2636 /* get our ducks in a row for the read */ 2637 bp->b_blkno = offset / DEV_BSIZE; 2638 bp->b_bcount = dsize; 2639 bp->b_flags |= B_READ; 2640 bp->b_resid = dsize; 2641 2642 bdev = bdevsw_lookup(bp->b_dev); 2643 if (bdev == NULL) 2644 return (ENXIO); 2645 (*bdev->d_strategy)(bp); 2646 2647 error = biowait(bp); 2648 2649 if (!error) { 2650 memcpy(data, bp->b_data, msize); 2651 } 2652 2653 brelse(bp, 0); 2654 return(error); 2655 } 2656 2657 2658 static int 2659 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2660 RF_ComponentLabel_t *clabel) 2661 { 2662 return raidwrite_component_area(dev, b_vp, clabel, 2663 sizeof(RF_ComponentLabel_t), 2664 rf_component_info_offset(), 2665 rf_component_info_size(secsize), 0); 2666 } 2667 2668 /* ARGSUSED */ 2669 static int 2670 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data, 2671 size_t msize, daddr_t offset, daddr_t dsize, int asyncp) 2672 { 2673 struct buf *bp; 2674 const struct bdevsw *bdev; 2675 int error; 2676 2677 /* get a block of the appropriate size... */ 2678 bp = geteblk((int)dsize); 2679 bp->b_dev = dev; 2680 2681 /* get our ducks in a row for the write */ 2682 bp->b_blkno = offset / DEV_BSIZE; 2683 bp->b_bcount = dsize; 2684 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0); 2685 bp->b_resid = dsize; 2686 2687 memset(bp->b_data, 0, dsize); 2688 memcpy(bp->b_data, data, msize); 2689 2690 bdev = bdevsw_lookup(bp->b_dev); 2691 if (bdev == NULL) 2692 return (ENXIO); 2693 (*bdev->d_strategy)(bp); 2694 if (asyncp) 2695 return 0; 2696 error = biowait(bp); 2697 brelse(bp, 0); 2698 if (error) { 2699 #if 1 2700 printf("Failed to write RAID component info!\n"); 2701 #endif 2702 } 2703 2704 return(error); 2705 } 2706 2707 void 2708 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2709 { 2710 int c; 2711 2712 for (c = 0; c < raidPtr->numCol; c++) { 2713 /* Skip dead disks. */ 2714 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2715 continue; 2716 /* XXXjld: what if an error occurs here? */ 2717 raidwrite_component_area(raidPtr->Disks[c].dev, 2718 raidPtr->raid_cinfo[c].ci_vp, map, 2719 RF_PARITYMAP_NBYTE, 2720 rf_parity_map_offset(raidPtr), 2721 rf_parity_map_size(raidPtr), 0); 2722 } 2723 } 2724 2725 void 2726 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2727 { 2728 struct rf_paritymap_ondisk tmp; 2729 int c,first; 2730 2731 first=1; 2732 for (c = 0; c < raidPtr->numCol; c++) { 2733 /* Skip dead disks. */ 2734 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2735 continue; 2736 raidread_component_area(raidPtr->Disks[c].dev, 2737 raidPtr->raid_cinfo[c].ci_vp, &tmp, 2738 RF_PARITYMAP_NBYTE, 2739 rf_parity_map_offset(raidPtr), 2740 rf_parity_map_size(raidPtr)); 2741 if (first) { 2742 memcpy(map, &tmp, sizeof(*map)); 2743 first = 0; 2744 } else { 2745 rf_paritymap_merge(map, &tmp); 2746 } 2747 } 2748 } 2749 2750 void 2751 rf_markalldirty(RF_Raid_t *raidPtr) 2752 { 2753 RF_ComponentLabel_t *clabel; 2754 int sparecol; 2755 int c; 2756 int j; 2757 int scol = -1; 2758 2759 raidPtr->mod_counter++; 2760 for (c = 0; c < raidPtr->numCol; c++) { 2761 /* we don't want to touch (at all) a disk that has 2762 failed */ 2763 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2764 clabel = raidget_component_label(raidPtr, c); 2765 if (clabel->status == rf_ds_spared) { 2766 /* XXX do something special... 2767 but whatever you do, don't 2768 try to access it!! */ 2769 } else { 2770 raidmarkdirty(raidPtr, c); 2771 } 2772 } 2773 } 2774 2775 for( c = 0; c < raidPtr->numSpare ; c++) { 2776 sparecol = raidPtr->numCol + c; 2777 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2778 /* 2779 2780 we claim this disk is "optimal" if it's 2781 rf_ds_used_spare, as that means it should be 2782 directly substitutable for the disk it replaced. 2783 We note that too... 2784 2785 */ 2786 2787 for(j=0;j<raidPtr->numCol;j++) { 2788 if (raidPtr->Disks[j].spareCol == sparecol) { 2789 scol = j; 2790 break; 2791 } 2792 } 2793 2794 clabel = raidget_component_label(raidPtr, sparecol); 2795 /* make sure status is noted */ 2796 2797 raid_init_component_label(raidPtr, clabel); 2798 2799 clabel->row = 0; 2800 clabel->column = scol; 2801 /* Note: we *don't* change status from rf_ds_used_spare 2802 to rf_ds_optimal */ 2803 /* clabel.status = rf_ds_optimal; */ 2804 2805 raidmarkdirty(raidPtr, sparecol); 2806 } 2807 } 2808 } 2809 2810 2811 void 2812 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2813 { 2814 RF_ComponentLabel_t *clabel; 2815 int sparecol; 2816 int c; 2817 int j; 2818 int scol; 2819 2820 scol = -1; 2821 2822 /* XXX should do extra checks to make sure things really are clean, 2823 rather than blindly setting the clean bit... */ 2824 2825 raidPtr->mod_counter++; 2826 2827 for (c = 0; c < raidPtr->numCol; c++) { 2828 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2829 clabel = raidget_component_label(raidPtr, c); 2830 /* make sure status is noted */ 2831 clabel->status = rf_ds_optimal; 2832 2833 /* note what unit we are configured as */ 2834 clabel->last_unit = raidPtr->raidid; 2835 2836 raidflush_component_label(raidPtr, c); 2837 if (final == RF_FINAL_COMPONENT_UPDATE) { 2838 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2839 raidmarkclean(raidPtr, c); 2840 } 2841 } 2842 } 2843 /* else we don't touch it.. */ 2844 } 2845 2846 for( c = 0; c < raidPtr->numSpare ; c++) { 2847 sparecol = raidPtr->numCol + c; 2848 /* Need to ensure that the reconstruct actually completed! */ 2849 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2850 /* 2851 2852 we claim this disk is "optimal" if it's 2853 rf_ds_used_spare, as that means it should be 2854 directly substitutable for the disk it replaced. 2855 We note that too... 2856 2857 */ 2858 2859 for(j=0;j<raidPtr->numCol;j++) { 2860 if (raidPtr->Disks[j].spareCol == sparecol) { 2861 scol = j; 2862 break; 2863 } 2864 } 2865 2866 /* XXX shouldn't *really* need this... */ 2867 clabel = raidget_component_label(raidPtr, sparecol); 2868 /* make sure status is noted */ 2869 2870 raid_init_component_label(raidPtr, clabel); 2871 2872 clabel->column = scol; 2873 clabel->status = rf_ds_optimal; 2874 clabel->last_unit = raidPtr->raidid; 2875 2876 raidflush_component_label(raidPtr, sparecol); 2877 if (final == RF_FINAL_COMPONENT_UPDATE) { 2878 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2879 raidmarkclean(raidPtr, sparecol); 2880 } 2881 } 2882 } 2883 } 2884 } 2885 2886 void 2887 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2888 { 2889 2890 if (vp != NULL) { 2891 if (auto_configured == 1) { 2892 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2893 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2894 vput(vp); 2895 2896 } else { 2897 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2898 } 2899 } 2900 } 2901 2902 2903 void 2904 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2905 { 2906 int r,c; 2907 struct vnode *vp; 2908 int acd; 2909 2910 2911 /* We take this opportunity to close the vnodes like we should.. */ 2912 2913 for (c = 0; c < raidPtr->numCol; c++) { 2914 vp = raidPtr->raid_cinfo[c].ci_vp; 2915 acd = raidPtr->Disks[c].auto_configured; 2916 rf_close_component(raidPtr, vp, acd); 2917 raidPtr->raid_cinfo[c].ci_vp = NULL; 2918 raidPtr->Disks[c].auto_configured = 0; 2919 } 2920 2921 for (r = 0; r < raidPtr->numSpare; r++) { 2922 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2923 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2924 rf_close_component(raidPtr, vp, acd); 2925 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2926 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2927 } 2928 } 2929 2930 2931 void 2932 rf_ReconThread(struct rf_recon_req *req) 2933 { 2934 int s; 2935 RF_Raid_t *raidPtr; 2936 2937 s = splbio(); 2938 raidPtr = (RF_Raid_t *) req->raidPtr; 2939 raidPtr->recon_in_progress = 1; 2940 2941 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2942 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2943 2944 RF_Free(req, sizeof(*req)); 2945 2946 raidPtr->recon_in_progress = 0; 2947 splx(s); 2948 2949 /* That's all... */ 2950 kthread_exit(0); /* does not return */ 2951 } 2952 2953 void 2954 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2955 { 2956 int retcode; 2957 int s; 2958 2959 raidPtr->parity_rewrite_stripes_done = 0; 2960 raidPtr->parity_rewrite_in_progress = 1; 2961 s = splbio(); 2962 retcode = rf_RewriteParity(raidPtr); 2963 splx(s); 2964 if (retcode) { 2965 printf("raid%d: Error re-writing parity (%d)!\n", 2966 raidPtr->raidid, retcode); 2967 } else { 2968 /* set the clean bit! If we shutdown correctly, 2969 the clean bit on each component label will get 2970 set */ 2971 raidPtr->parity_good = RF_RAID_CLEAN; 2972 } 2973 raidPtr->parity_rewrite_in_progress = 0; 2974 2975 /* Anyone waiting for us to stop? If so, inform them... */ 2976 if (raidPtr->waitShutdown) { 2977 wakeup(&raidPtr->parity_rewrite_in_progress); 2978 } 2979 2980 /* That's all... */ 2981 kthread_exit(0); /* does not return */ 2982 } 2983 2984 2985 void 2986 rf_CopybackThread(RF_Raid_t *raidPtr) 2987 { 2988 int s; 2989 2990 raidPtr->copyback_in_progress = 1; 2991 s = splbio(); 2992 rf_CopybackReconstructedData(raidPtr); 2993 splx(s); 2994 raidPtr->copyback_in_progress = 0; 2995 2996 /* That's all... */ 2997 kthread_exit(0); /* does not return */ 2998 } 2999 3000 3001 void 3002 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 3003 { 3004 int s; 3005 RF_Raid_t *raidPtr; 3006 3007 s = splbio(); 3008 raidPtr = req->raidPtr; 3009 raidPtr->recon_in_progress = 1; 3010 rf_ReconstructInPlace(raidPtr, req->col); 3011 RF_Free(req, sizeof(*req)); 3012 raidPtr->recon_in_progress = 0; 3013 splx(s); 3014 3015 /* That's all... */ 3016 kthread_exit(0); /* does not return */ 3017 } 3018 3019 static RF_AutoConfig_t * 3020 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 3021 const char *cname, RF_SectorCount_t size, uint64_t numsecs, 3022 unsigned secsize) 3023 { 3024 int good_one = 0; 3025 RF_ComponentLabel_t *clabel; 3026 RF_AutoConfig_t *ac; 3027 3028 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT); 3029 if (clabel == NULL) { 3030 oomem: 3031 while(ac_list) { 3032 ac = ac_list; 3033 if (ac->clabel) 3034 free(ac->clabel, M_RAIDFRAME); 3035 ac_list = ac_list->next; 3036 free(ac, M_RAIDFRAME); 3037 } 3038 printf("RAID auto config: out of memory!\n"); 3039 return NULL; /* XXX probably should panic? */ 3040 } 3041 3042 if (!raidread_component_label(secsize, dev, vp, clabel)) { 3043 /* Got the label. Does it look reasonable? */ 3044 if (rf_reasonable_label(clabel, numsecs) && 3045 (rf_component_label_partitionsize(clabel) <= size)) { 3046 #ifdef DEBUG 3047 printf("Component on: %s: %llu\n", 3048 cname, (unsigned long long)size); 3049 rf_print_component_label(clabel); 3050 #endif 3051 /* if it's reasonable, add it, else ignore it. */ 3052 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 3053 M_NOWAIT); 3054 if (ac == NULL) { 3055 free(clabel, M_RAIDFRAME); 3056 goto oomem; 3057 } 3058 strlcpy(ac->devname, cname, sizeof(ac->devname)); 3059 ac->dev = dev; 3060 ac->vp = vp; 3061 ac->clabel = clabel; 3062 ac->next = ac_list; 3063 ac_list = ac; 3064 good_one = 1; 3065 } 3066 } 3067 if (!good_one) { 3068 /* cleanup */ 3069 free(clabel, M_RAIDFRAME); 3070 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3071 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3072 vput(vp); 3073 } 3074 return ac_list; 3075 } 3076 3077 RF_AutoConfig_t * 3078 rf_find_raid_components(void) 3079 { 3080 struct vnode *vp; 3081 struct disklabel label; 3082 device_t dv; 3083 deviter_t di; 3084 dev_t dev; 3085 int bmajor, bminor, wedge, rf_part_found; 3086 int error; 3087 int i; 3088 RF_AutoConfig_t *ac_list; 3089 uint64_t numsecs; 3090 unsigned secsize; 3091 3092 /* initialize the AutoConfig list */ 3093 ac_list = NULL; 3094 3095 /* we begin by trolling through *all* the devices on the system */ 3096 3097 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; 3098 dv = deviter_next(&di)) { 3099 3100 /* we are only interested in disks... */ 3101 if (device_class(dv) != DV_DISK) 3102 continue; 3103 3104 /* we don't care about floppies... */ 3105 if (device_is_a(dv, "fd")) { 3106 continue; 3107 } 3108 3109 /* we don't care about CD's... */ 3110 if (device_is_a(dv, "cd")) { 3111 continue; 3112 } 3113 3114 /* we don't care about md's... */ 3115 if (device_is_a(dv, "md")) { 3116 continue; 3117 } 3118 3119 /* hdfd is the Atari/Hades floppy driver */ 3120 if (device_is_a(dv, "hdfd")) { 3121 continue; 3122 } 3123 3124 /* fdisa is the Atari/Milan floppy driver */ 3125 if (device_is_a(dv, "fdisa")) { 3126 continue; 3127 } 3128 3129 /* need to find the device_name_to_block_device_major stuff */ 3130 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 3131 3132 rf_part_found = 0; /*No raid partition as yet*/ 3133 3134 /* get a vnode for the raw partition of this disk */ 3135 3136 wedge = device_is_a(dv, "dk"); 3137 bminor = minor(device_unit(dv)); 3138 dev = wedge ? makedev(bmajor, bminor) : 3139 MAKEDISKDEV(bmajor, bminor, RAW_PART); 3140 if (bdevvp(dev, &vp)) 3141 panic("RAID can't alloc vnode"); 3142 3143 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED); 3144 3145 if (error) { 3146 /* "Who cares." Continue looking 3147 for something that exists*/ 3148 vput(vp); 3149 continue; 3150 } 3151 3152 error = getdisksize(vp, &numsecs, &secsize); 3153 if (error) { 3154 vput(vp); 3155 continue; 3156 } 3157 if (wedge) { 3158 struct dkwedge_info dkw; 3159 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 3160 NOCRED); 3161 if (error) { 3162 printf("RAIDframe: can't get wedge info for " 3163 "dev %s (%d)\n", device_xname(dv), error); 3164 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3165 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3166 vput(vp); 3167 continue; 3168 } 3169 3170 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 3171 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3172 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3173 vput(vp); 3174 continue; 3175 } 3176 3177 ac_list = rf_get_component(ac_list, dev, vp, 3178 device_xname(dv), dkw.dkw_size, numsecs, secsize); 3179 rf_part_found = 1; /*There is a raid component on this disk*/ 3180 continue; 3181 } 3182 3183 /* Ok, the disk exists. Go get the disklabel. */ 3184 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 3185 if (error) { 3186 /* 3187 * XXX can't happen - open() would 3188 * have errored out (or faked up one) 3189 */ 3190 if (error != ENOTTY) 3191 printf("RAIDframe: can't get label for dev " 3192 "%s (%d)\n", device_xname(dv), error); 3193 } 3194 3195 /* don't need this any more. We'll allocate it again 3196 a little later if we really do... */ 3197 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3198 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3199 vput(vp); 3200 3201 if (error) 3202 continue; 3203 3204 rf_part_found = 0; /*No raid partitions yet*/ 3205 for (i = 0; i < label.d_npartitions; i++) { 3206 char cname[sizeof(ac_list->devname)]; 3207 3208 /* We only support partitions marked as RAID */ 3209 if (label.d_partitions[i].p_fstype != FS_RAID) 3210 continue; 3211 3212 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 3213 if (bdevvp(dev, &vp)) 3214 panic("RAID can't alloc vnode"); 3215 3216 error = VOP_OPEN(vp, FREAD, NOCRED); 3217 if (error) { 3218 /* Whatever... */ 3219 vput(vp); 3220 continue; 3221 } 3222 snprintf(cname, sizeof(cname), "%s%c", 3223 device_xname(dv), 'a' + i); 3224 ac_list = rf_get_component(ac_list, dev, vp, cname, 3225 label.d_partitions[i].p_size, numsecs, secsize); 3226 rf_part_found = 1; /*There is at least one raid partition on this disk*/ 3227 } 3228 3229 /* 3230 *If there is no raid component on this disk, either in a 3231 *disklabel or inside a wedge, check the raw partition as well, 3232 *as it is possible to configure raid components on raw disk 3233 *devices. 3234 */ 3235 3236 if (!rf_part_found) { 3237 char cname[sizeof(ac_list->devname)]; 3238 3239 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART); 3240 if (bdevvp(dev, &vp)) 3241 panic("RAID can't alloc vnode"); 3242 3243 error = VOP_OPEN(vp, FREAD, NOCRED); 3244 if (error) { 3245 /* Whatever... */ 3246 vput(vp); 3247 continue; 3248 } 3249 snprintf(cname, sizeof(cname), "%s%c", 3250 device_xname(dv), 'a' + RAW_PART); 3251 ac_list = rf_get_component(ac_list, dev, vp, cname, 3252 label.d_partitions[RAW_PART].p_size, numsecs, secsize); 3253 } 3254 } 3255 deviter_release(&di); 3256 return ac_list; 3257 } 3258 3259 3260 int 3261 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3262 { 3263 3264 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 3265 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 3266 ((clabel->clean == RF_RAID_CLEAN) || 3267 (clabel->clean == RF_RAID_DIRTY)) && 3268 clabel->row >=0 && 3269 clabel->column >= 0 && 3270 clabel->num_rows > 0 && 3271 clabel->num_columns > 0 && 3272 clabel->row < clabel->num_rows && 3273 clabel->column < clabel->num_columns && 3274 clabel->blockSize > 0 && 3275 /* 3276 * numBlocksHi may contain garbage, but it is ok since 3277 * the type is unsigned. If it is really garbage, 3278 * rf_fix_old_label_size() will fix it. 3279 */ 3280 rf_component_label_numblocks(clabel) > 0) { 3281 /* 3282 * label looks reasonable enough... 3283 * let's make sure it has no old garbage. 3284 */ 3285 if (numsecs) 3286 rf_fix_old_label_size(clabel, numsecs); 3287 return(1); 3288 } 3289 return(0); 3290 } 3291 3292 3293 /* 3294 * For reasons yet unknown, some old component labels have garbage in 3295 * the newer numBlocksHi region, and this causes lossage. Since those 3296 * disks will also have numsecs set to less than 32 bits of sectors, 3297 * we can determine when this corruption has occurred, and fix it. 3298 * 3299 * The exact same problem, with the same unknown reason, happens to 3300 * the partitionSizeHi member as well. 3301 */ 3302 static void 3303 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3304 { 3305 3306 if (numsecs < ((uint64_t)1 << 32)) { 3307 if (clabel->numBlocksHi) { 3308 printf("WARNING: total sectors < 32 bits, yet " 3309 "numBlocksHi set\n" 3310 "WARNING: resetting numBlocksHi to zero.\n"); 3311 clabel->numBlocksHi = 0; 3312 } 3313 3314 if (clabel->partitionSizeHi) { 3315 printf("WARNING: total sectors < 32 bits, yet " 3316 "partitionSizeHi set\n" 3317 "WARNING: resetting partitionSizeHi to zero.\n"); 3318 clabel->partitionSizeHi = 0; 3319 } 3320 } 3321 } 3322 3323 3324 #ifdef DEBUG 3325 void 3326 rf_print_component_label(RF_ComponentLabel_t *clabel) 3327 { 3328 uint64_t numBlocks; 3329 static const char *rp[] = { 3330 "No", "Force", "Soft", "*invalid*" 3331 }; 3332 3333 3334 numBlocks = rf_component_label_numblocks(clabel); 3335 3336 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3337 clabel->row, clabel->column, 3338 clabel->num_rows, clabel->num_columns); 3339 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3340 clabel->version, clabel->serial_number, 3341 clabel->mod_counter); 3342 printf(" Clean: %s Status: %d\n", 3343 clabel->clean ? "Yes" : "No", clabel->status); 3344 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3345 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3346 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n", 3347 (char) clabel->parityConfig, clabel->blockSize, numBlocks); 3348 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No"); 3349 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]); 3350 printf(" Last configured as: raid%d\n", clabel->last_unit); 3351 #if 0 3352 printf(" Config order: %d\n", clabel->config_order); 3353 #endif 3354 3355 } 3356 #endif 3357 3358 RF_ConfigSet_t * 3359 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3360 { 3361 RF_AutoConfig_t *ac; 3362 RF_ConfigSet_t *config_sets; 3363 RF_ConfigSet_t *cset; 3364 RF_AutoConfig_t *ac_next; 3365 3366 3367 config_sets = NULL; 3368 3369 /* Go through the AutoConfig list, and figure out which components 3370 belong to what sets. */ 3371 ac = ac_list; 3372 while(ac!=NULL) { 3373 /* we're going to putz with ac->next, so save it here 3374 for use at the end of the loop */ 3375 ac_next = ac->next; 3376 3377 if (config_sets == NULL) { 3378 /* will need at least this one... */ 3379 config_sets = (RF_ConfigSet_t *) 3380 malloc(sizeof(RF_ConfigSet_t), 3381 M_RAIDFRAME, M_NOWAIT); 3382 if (config_sets == NULL) { 3383 panic("rf_create_auto_sets: No memory!"); 3384 } 3385 /* this one is easy :) */ 3386 config_sets->ac = ac; 3387 config_sets->next = NULL; 3388 config_sets->rootable = 0; 3389 ac->next = NULL; 3390 } else { 3391 /* which set does this component fit into? */ 3392 cset = config_sets; 3393 while(cset!=NULL) { 3394 if (rf_does_it_fit(cset, ac)) { 3395 /* looks like it matches... */ 3396 ac->next = cset->ac; 3397 cset->ac = ac; 3398 break; 3399 } 3400 cset = cset->next; 3401 } 3402 if (cset==NULL) { 3403 /* didn't find a match above... new set..*/ 3404 cset = (RF_ConfigSet_t *) 3405 malloc(sizeof(RF_ConfigSet_t), 3406 M_RAIDFRAME, M_NOWAIT); 3407 if (cset == NULL) { 3408 panic("rf_create_auto_sets: No memory!"); 3409 } 3410 cset->ac = ac; 3411 ac->next = NULL; 3412 cset->next = config_sets; 3413 cset->rootable = 0; 3414 config_sets = cset; 3415 } 3416 } 3417 ac = ac_next; 3418 } 3419 3420 3421 return(config_sets); 3422 } 3423 3424 static int 3425 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3426 { 3427 RF_ComponentLabel_t *clabel1, *clabel2; 3428 3429 /* If this one matches the *first* one in the set, that's good 3430 enough, since the other members of the set would have been 3431 through here too... */ 3432 /* note that we are not checking partitionSize here.. 3433 3434 Note that we are also not checking the mod_counters here. 3435 If everything else matches except the mod_counter, that's 3436 good enough for this test. We will deal with the mod_counters 3437 a little later in the autoconfiguration process. 3438 3439 (clabel1->mod_counter == clabel2->mod_counter) && 3440 3441 The reason we don't check for this is that failed disks 3442 will have lower modification counts. If those disks are 3443 not added to the set they used to belong to, then they will 3444 form their own set, which may result in 2 different sets, 3445 for example, competing to be configured at raid0, and 3446 perhaps competing to be the root filesystem set. If the 3447 wrong ones get configured, or both attempt to become /, 3448 weird behaviour and or serious lossage will occur. Thus we 3449 need to bring them into the fold here, and kick them out at 3450 a later point. 3451 3452 */ 3453 3454 clabel1 = cset->ac->clabel; 3455 clabel2 = ac->clabel; 3456 if ((clabel1->version == clabel2->version) && 3457 (clabel1->serial_number == clabel2->serial_number) && 3458 (clabel1->num_rows == clabel2->num_rows) && 3459 (clabel1->num_columns == clabel2->num_columns) && 3460 (clabel1->sectPerSU == clabel2->sectPerSU) && 3461 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3462 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3463 (clabel1->parityConfig == clabel2->parityConfig) && 3464 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3465 (clabel1->blockSize == clabel2->blockSize) && 3466 rf_component_label_numblocks(clabel1) == 3467 rf_component_label_numblocks(clabel2) && 3468 (clabel1->autoconfigure == clabel2->autoconfigure) && 3469 (clabel1->root_partition == clabel2->root_partition) && 3470 (clabel1->last_unit == clabel2->last_unit) && 3471 (clabel1->config_order == clabel2->config_order)) { 3472 /* if it get's here, it almost *has* to be a match */ 3473 } else { 3474 /* it's not consistent with somebody in the set.. 3475 punt */ 3476 return(0); 3477 } 3478 /* all was fine.. it must fit... */ 3479 return(1); 3480 } 3481 3482 int 3483 rf_have_enough_components(RF_ConfigSet_t *cset) 3484 { 3485 RF_AutoConfig_t *ac; 3486 RF_AutoConfig_t *auto_config; 3487 RF_ComponentLabel_t *clabel; 3488 int c; 3489 int num_cols; 3490 int num_missing; 3491 int mod_counter; 3492 int mod_counter_found; 3493 int even_pair_failed; 3494 char parity_type; 3495 3496 3497 /* check to see that we have enough 'live' components 3498 of this set. If so, we can configure it if necessary */ 3499 3500 num_cols = cset->ac->clabel->num_columns; 3501 parity_type = cset->ac->clabel->parityConfig; 3502 3503 /* XXX Check for duplicate components!?!?!? */ 3504 3505 /* Determine what the mod_counter is supposed to be for this set. */ 3506 3507 mod_counter_found = 0; 3508 mod_counter = 0; 3509 ac = cset->ac; 3510 while(ac!=NULL) { 3511 if (mod_counter_found==0) { 3512 mod_counter = ac->clabel->mod_counter; 3513 mod_counter_found = 1; 3514 } else { 3515 if (ac->clabel->mod_counter > mod_counter) { 3516 mod_counter = ac->clabel->mod_counter; 3517 } 3518 } 3519 ac = ac->next; 3520 } 3521 3522 num_missing = 0; 3523 auto_config = cset->ac; 3524 3525 even_pair_failed = 0; 3526 for(c=0; c<num_cols; c++) { 3527 ac = auto_config; 3528 while(ac!=NULL) { 3529 if ((ac->clabel->column == c) && 3530 (ac->clabel->mod_counter == mod_counter)) { 3531 /* it's this one... */ 3532 #ifdef DEBUG 3533 printf("Found: %s at %d\n", 3534 ac->devname,c); 3535 #endif 3536 break; 3537 } 3538 ac=ac->next; 3539 } 3540 if (ac==NULL) { 3541 /* Didn't find one here! */ 3542 /* special case for RAID 1, especially 3543 where there are more than 2 3544 components (where RAIDframe treats 3545 things a little differently :( ) */ 3546 if (parity_type == '1') { 3547 if (c%2 == 0) { /* even component */ 3548 even_pair_failed = 1; 3549 } else { /* odd component. If 3550 we're failed, and 3551 so is the even 3552 component, it's 3553 "Good Night, Charlie" */ 3554 if (even_pair_failed == 1) { 3555 return(0); 3556 } 3557 } 3558 } else { 3559 /* normal accounting */ 3560 num_missing++; 3561 } 3562 } 3563 if ((parity_type == '1') && (c%2 == 1)) { 3564 /* Just did an even component, and we didn't 3565 bail.. reset the even_pair_failed flag, 3566 and go on to the next component.... */ 3567 even_pair_failed = 0; 3568 } 3569 } 3570 3571 clabel = cset->ac->clabel; 3572 3573 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3574 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3575 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3576 /* XXX this needs to be made *much* more general */ 3577 /* Too many failures */ 3578 return(0); 3579 } 3580 /* otherwise, all is well, and we've got enough to take a kick 3581 at autoconfiguring this set */ 3582 return(1); 3583 } 3584 3585 void 3586 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3587 RF_Raid_t *raidPtr) 3588 { 3589 RF_ComponentLabel_t *clabel; 3590 int i; 3591 3592 clabel = ac->clabel; 3593 3594 /* 1. Fill in the common stuff */ 3595 config->numRow = clabel->num_rows = 1; 3596 config->numCol = clabel->num_columns; 3597 config->numSpare = 0; /* XXX should this be set here? */ 3598 config->sectPerSU = clabel->sectPerSU; 3599 config->SUsPerPU = clabel->SUsPerPU; 3600 config->SUsPerRU = clabel->SUsPerRU; 3601 config->parityConfig = clabel->parityConfig; 3602 /* XXX... */ 3603 strcpy(config->diskQueueType,"fifo"); 3604 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3605 config->layoutSpecificSize = 0; /* XXX ?? */ 3606 3607 while(ac!=NULL) { 3608 /* row/col values will be in range due to the checks 3609 in reasonable_label() */ 3610 strcpy(config->devnames[0][ac->clabel->column], 3611 ac->devname); 3612 ac = ac->next; 3613 } 3614 3615 for(i=0;i<RF_MAXDBGV;i++) { 3616 config->debugVars[i][0] = 0; 3617 } 3618 } 3619 3620 int 3621 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3622 { 3623 RF_ComponentLabel_t *clabel; 3624 int column; 3625 int sparecol; 3626 3627 raidPtr->autoconfigure = new_value; 3628 3629 for(column=0; column<raidPtr->numCol; column++) { 3630 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3631 clabel = raidget_component_label(raidPtr, column); 3632 clabel->autoconfigure = new_value; 3633 raidflush_component_label(raidPtr, column); 3634 } 3635 } 3636 for(column = 0; column < raidPtr->numSpare ; column++) { 3637 sparecol = raidPtr->numCol + column; 3638 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3639 clabel = raidget_component_label(raidPtr, sparecol); 3640 clabel->autoconfigure = new_value; 3641 raidflush_component_label(raidPtr, sparecol); 3642 } 3643 } 3644 return(new_value); 3645 } 3646 3647 int 3648 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3649 { 3650 RF_ComponentLabel_t *clabel; 3651 int column; 3652 int sparecol; 3653 3654 raidPtr->root_partition = new_value; 3655 for(column=0; column<raidPtr->numCol; column++) { 3656 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3657 clabel = raidget_component_label(raidPtr, column); 3658 clabel->root_partition = new_value; 3659 raidflush_component_label(raidPtr, column); 3660 } 3661 } 3662 for(column = 0; column < raidPtr->numSpare ; column++) { 3663 sparecol = raidPtr->numCol + column; 3664 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3665 clabel = raidget_component_label(raidPtr, sparecol); 3666 clabel->root_partition = new_value; 3667 raidflush_component_label(raidPtr, sparecol); 3668 } 3669 } 3670 return(new_value); 3671 } 3672 3673 void 3674 rf_release_all_vps(RF_ConfigSet_t *cset) 3675 { 3676 RF_AutoConfig_t *ac; 3677 3678 ac = cset->ac; 3679 while(ac!=NULL) { 3680 /* Close the vp, and give it back */ 3681 if (ac->vp) { 3682 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3683 VOP_CLOSE(ac->vp, FREAD, NOCRED); 3684 vput(ac->vp); 3685 ac->vp = NULL; 3686 } 3687 ac = ac->next; 3688 } 3689 } 3690 3691 3692 void 3693 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3694 { 3695 RF_AutoConfig_t *ac; 3696 RF_AutoConfig_t *next_ac; 3697 3698 ac = cset->ac; 3699 while(ac!=NULL) { 3700 next_ac = ac->next; 3701 /* nuke the label */ 3702 free(ac->clabel, M_RAIDFRAME); 3703 /* cleanup the config structure */ 3704 free(ac, M_RAIDFRAME); 3705 /* "next.." */ 3706 ac = next_ac; 3707 } 3708 /* and, finally, nuke the config set */ 3709 free(cset, M_RAIDFRAME); 3710 } 3711 3712 3713 void 3714 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3715 { 3716 /* current version number */ 3717 clabel->version = RF_COMPONENT_LABEL_VERSION; 3718 clabel->serial_number = raidPtr->serial_number; 3719 clabel->mod_counter = raidPtr->mod_counter; 3720 3721 clabel->num_rows = 1; 3722 clabel->num_columns = raidPtr->numCol; 3723 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3724 clabel->status = rf_ds_optimal; /* "It's good!" */ 3725 3726 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3727 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3728 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3729 3730 clabel->blockSize = raidPtr->bytesPerSector; 3731 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk); 3732 3733 /* XXX not portable */ 3734 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3735 clabel->maxOutstanding = raidPtr->maxOutstanding; 3736 clabel->autoconfigure = raidPtr->autoconfigure; 3737 clabel->root_partition = raidPtr->root_partition; 3738 clabel->last_unit = raidPtr->raidid; 3739 clabel->config_order = raidPtr->config_order; 3740 3741 #ifndef RF_NO_PARITY_MAP 3742 rf_paritymap_init_label(raidPtr->parity_map, clabel); 3743 #endif 3744 } 3745 3746 struct raid_softc * 3747 rf_auto_config_set(RF_ConfigSet_t *cset) 3748 { 3749 RF_Raid_t *raidPtr; 3750 RF_Config_t *config; 3751 int raidID; 3752 struct raid_softc *sc; 3753 3754 #ifdef DEBUG 3755 printf("RAID autoconfigure\n"); 3756 #endif 3757 3758 /* 1. Create a config structure */ 3759 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO); 3760 if (config == NULL) { 3761 printf("Out of mem!?!?\n"); 3762 /* XXX do something more intelligent here. */ 3763 return NULL; 3764 } 3765 3766 /* 3767 2. Figure out what RAID ID this one is supposed to live at 3768 See if we can get the same RAID dev that it was configured 3769 on last time.. 3770 */ 3771 3772 raidID = cset->ac->clabel->last_unit; 3773 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID)) 3774 continue; 3775 #ifdef DEBUG 3776 printf("Configuring raid%d:\n",raidID); 3777 #endif 3778 3779 raidPtr = &sc->sc_r; 3780 3781 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3782 raidPtr->softc = sc; 3783 raidPtr->raidid = raidID; 3784 raidPtr->openings = RAIDOUTSTANDING; 3785 3786 /* 3. Build the configuration structure */ 3787 rf_create_configuration(cset->ac, config, raidPtr); 3788 3789 /* 4. Do the configuration */ 3790 if (rf_Configure(raidPtr, config, cset->ac) == 0) { 3791 raidinit(sc); 3792 3793 rf_markalldirty(raidPtr); 3794 raidPtr->autoconfigure = 1; /* XXX do this here? */ 3795 switch (cset->ac->clabel->root_partition) { 3796 case 1: /* Force Root */ 3797 case 2: /* Soft Root: root when boot partition part of raid */ 3798 /* 3799 * everything configured just fine. Make a note 3800 * that this set is eligible to be root, 3801 * or forced to be root 3802 */ 3803 cset->rootable = cset->ac->clabel->root_partition; 3804 /* XXX do this here? */ 3805 raidPtr->root_partition = cset->rootable; 3806 break; 3807 default: 3808 break; 3809 } 3810 } else { 3811 raidput(sc); 3812 sc = NULL; 3813 } 3814 3815 /* 5. Cleanup */ 3816 free(config, M_RAIDFRAME); 3817 return sc; 3818 } 3819 3820 void 3821 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3822 { 3823 struct buf *bp; 3824 struct raid_softc *rs; 3825 3826 bp = (struct buf *)desc->bp; 3827 rs = desc->raidPtr->softc; 3828 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid), 3829 (bp->b_flags & B_READ)); 3830 } 3831 3832 void 3833 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3834 size_t xmin, size_t xmax) 3835 { 3836 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3837 pool_sethiwat(p, xmax); 3838 pool_prime(p, xmin); 3839 pool_setlowat(p, xmin); 3840 } 3841 3842 /* 3843 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see 3844 * if there is IO pending and if that IO could possibly be done for a 3845 * given RAID set. Returns 0 if IO is waiting and can be done, 1 3846 * otherwise. 3847 * 3848 */ 3849 3850 int 3851 rf_buf_queue_check(RF_Raid_t *raidPtr) 3852 { 3853 struct raid_softc *rs = raidPtr->softc; 3854 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) { 3855 /* there is work to do */ 3856 return 0; 3857 } 3858 /* default is nothing to do */ 3859 return 1; 3860 } 3861 3862 int 3863 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr) 3864 { 3865 uint64_t numsecs; 3866 unsigned secsize; 3867 int error; 3868 3869 error = getdisksize(vp, &numsecs, &secsize); 3870 if (error == 0) { 3871 diskPtr->blockSize = secsize; 3872 diskPtr->numBlocks = numsecs - rf_protectedSectors; 3873 diskPtr->partitionSize = numsecs; 3874 return 0; 3875 } 3876 return error; 3877 } 3878 3879 static int 3880 raid_match(device_t self, cfdata_t cfdata, void *aux) 3881 { 3882 return 1; 3883 } 3884 3885 static void 3886 raid_attach(device_t parent, device_t self, void *aux) 3887 { 3888 3889 } 3890 3891 3892 static int 3893 raid_detach(device_t self, int flags) 3894 { 3895 int error; 3896 struct raid_softc *rs = raidget(device_unit(self)); 3897 3898 if (rs == NULL) 3899 return ENXIO; 3900 3901 if ((error = raidlock(rs)) != 0) 3902 return (error); 3903 3904 error = raid_detach_unlocked(rs); 3905 3906 raidunlock(rs); 3907 3908 /* XXXkd: raidput(rs) ??? */ 3909 3910 return error; 3911 } 3912 3913 static void 3914 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr) 3915 { 3916 struct disk_geom *dg = &rs->sc_dkdev.dk_geom; 3917 3918 memset(dg, 0, sizeof(*dg)); 3919 3920 dg->dg_secperunit = raidPtr->totalSectors; 3921 dg->dg_secsize = raidPtr->bytesPerSector; 3922 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe; 3923 dg->dg_ntracks = 4 * raidPtr->numCol; 3924 3925 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL); 3926 } 3927 3928 /* 3929 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components. 3930 * We end up returning whatever error was returned by the first cache flush 3931 * that fails. 3932 */ 3933 3934 int 3935 rf_sync_component_caches(RF_Raid_t *raidPtr) 3936 { 3937 int c, sparecol; 3938 int e,error; 3939 int force = 1; 3940 3941 error = 0; 3942 for (c = 0; c < raidPtr->numCol; c++) { 3943 if (raidPtr->Disks[c].status == rf_ds_optimal) { 3944 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC, 3945 &force, FWRITE, NOCRED); 3946 if (e) { 3947 if (e != ENODEV) 3948 printf("raid%d: cache flush to component %s failed.\n", 3949 raidPtr->raidid, raidPtr->Disks[c].devname); 3950 if (error == 0) { 3951 error = e; 3952 } 3953 } 3954 } 3955 } 3956 3957 for( c = 0; c < raidPtr->numSpare ; c++) { 3958 sparecol = raidPtr->numCol + c; 3959 /* Need to ensure that the reconstruct actually completed! */ 3960 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3961 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp, 3962 DIOCCACHESYNC, &force, FWRITE, NOCRED); 3963 if (e) { 3964 if (e != ENODEV) 3965 printf("raid%d: cache flush to component %s failed.\n", 3966 raidPtr->raidid, raidPtr->Disks[sparecol].devname); 3967 if (error == 0) { 3968 error = e; 3969 } 3970 } 3971 } 3972 } 3973 return error; 3974 } 3975